pugixml.cpp 339 KB


  1. /**
  2. * pugixml parser - version 1.14
  3. * --------------------------------------------------------
  4. * Copyright (C) 2006-2023, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
  5. * Report bugs and download new versions at https://pugixml.org/
  6. *
  7. * This library is distributed under the MIT License. See notice at the end
  8. * of this file.
  9. *
  10. * This work is based on the pugxml parser, which is:
  11. * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
  12. */
  13. #ifndef SOURCE_PUGIXML_CPP
  14. #define SOURCE_PUGIXML_CPP
  15. #include "pugixml.hpp"
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. #include <string.h>
  19. #include <assert.h>
  20. #include <limits.h>
  21. #ifdef PUGIXML_WCHAR_MODE
  22. # include <wchar.h>
  23. #endif
  24. #ifndef PUGIXML_NO_XPATH
  25. # include <math.h>
  26. # include <float.h>
  27. #endif
  28. #ifndef PUGIXML_NO_STL
  29. # include <istream>
  30. # include <ostream>
  31. # include <string>
  32. #endif
  33. // For placement new
  34. #include <new>
  35. // For load_file
  36. #if defined(__linux__) || defined(__APPLE__)
  37. #include <sys/stat.h>
  38. #endif
  39. #ifdef _MSC_VER
  40. # pragma warning(push)
  41. # pragma warning(disable: 4127) // conditional expression is constant
  42. # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
  43. # pragma warning(disable: 4702) // unreachable code
  44. # pragma warning(disable: 4996) // this function or variable may be unsafe
  45. #endif
  46. #if defined(_MSC_VER) && defined(__c2__)
  47. # pragma clang diagnostic push
  48. # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
  49. #endif
  50. #ifdef __INTEL_COMPILER
  51. # pragma warning(disable: 177) // function was declared but never referenced
  52. # pragma warning(disable: 279) // controlling expression is constant
  53. # pragma warning(disable: 1478 1786) // function was declared "deprecated"
  54. # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
  55. #endif
  56. #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
  57. # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
  58. #endif
  59. #ifdef __BORLANDC__
  60. # pragma option push
  61. # pragma warn -8008 // condition is always false
  62. # pragma warn -8066 // unreachable code
  63. #endif
  64. #ifdef __SNC__
  65. // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
  66. # pragma diag_suppress=178 // function was declared but never referenced
  67. # pragma diag_suppress=237 // controlling expression is constant
  68. #endif
  69. #ifdef __TI_COMPILER_VERSION__
  70. # pragma diag_suppress 179 // function was declared but never referenced
  71. #endif
  72. // Inlining controls
  73. #if defined(_MSC_VER) && _MSC_VER >= 1300
  74. # define PUGI_IMPL_NO_INLINE __declspec(noinline)
  75. #elif defined(__GNUC__)
  76. # define PUGI_IMPL_NO_INLINE __attribute__((noinline))
  77. #else
  78. # define PUGI_IMPL_NO_INLINE
  79. #endif
  80. // Branch weight controls
  81. #if defined(__GNUC__) && !defined(__c2__)
  82. # define PUGI_IMPL_UNLIKELY(cond) __builtin_expect(cond, 0)
  83. #else
  84. # define PUGI_IMPL_UNLIKELY(cond) (cond)
  85. #endif
  86. // Simple static assertion
  87. #define PUGI_IMPL_STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
  88. // Digital Mars C++ bug workaround for passing char loaded from memory via stack
  89. #ifdef __DMC__
  90. # define PUGI_IMPL_DMC_VOLATILE volatile
  91. #else
  92. # define PUGI_IMPL_DMC_VOLATILE
  93. #endif
  94. // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
  95. #if defined(__clang__) && defined(__has_attribute)
  96. # if __has_attribute(no_sanitize)
  97. # define PUGI_IMPL_UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
  98. # else
  99. # define PUGI_IMPL_UNSIGNED_OVERFLOW
  100. # endif
  101. #else
  102. # define PUGI_IMPL_UNSIGNED_OVERFLOW
  103. #endif
  104. // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
  105. #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
  106. using std::memcpy;
  107. using std::memmove;
  108. using std::memset;
  109. #endif
  110. // Old versions of GCC do not define ::malloc and ::free depending on header include order
  111. #if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
  112. using std::malloc;
  113. using std::free;
  114. #endif
  115. // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
  116. #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
  117. # define LLONG_MIN (-LLONG_MAX - 1LL)
  118. # define LLONG_MAX __LONG_LONG_MAX__
  119. # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
  120. #endif
  121. // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
  122. #if defined(_MSC_VER) && !defined(__S3E__) && !defined(_WIN32_WCE)
  123. # define PUGI_IMPL_MSVC_CRT_VERSION _MSC_VER
  124. #elif defined(_WIN32_WCE)
  125. # define PUGI_IMPL_MSVC_CRT_VERSION 1310 // MSVC7.1
  126. #endif
  127. // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
  128. #if __cplusplus >= 201103
  129. # define PUGI_IMPL_SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
  130. #elif defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400
  131. # define PUGI_IMPL_SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
  132. #elif defined(__APPLE__) && __clang_major__ >= 14 // Xcode 14 marks sprintf as deprecated while still using C++98 by default
  133. # define PUGI_IMPL_SNPRINTF(buf, fmt, arg1, arg2) snprintf(buf, sizeof(buf), fmt, arg1, arg2)
  134. #else
  135. # define PUGI_IMPL_SNPRINTF sprintf
  136. #endif
  137. // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
  138. #ifdef PUGIXML_HEADER_ONLY
  139. # define PUGI_IMPL_NS_BEGIN namespace pugi { namespace impl {
  140. # define PUGI_IMPL_NS_END } }
  141. # define PUGI_IMPL_FN inline
  142. # define PUGI_IMPL_FN_NO_INLINE inline
  143. #else
  144. # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
  145. # define PUGI_IMPL_NS_BEGIN namespace pugi { namespace impl {
  146. # define PUGI_IMPL_NS_END } }
  147. # else
  148. # define PUGI_IMPL_NS_BEGIN namespace pugi { namespace impl { namespace {
  149. # define PUGI_IMPL_NS_END } } }
  150. # endif
  151. # define PUGI_IMPL_FN
  152. # define PUGI_IMPL_FN_NO_INLINE PUGI_IMPL_NO_INLINE
  153. #endif
  154. // uintptr_t
  155. #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
  156. namespace pugi
  157. {
  158. # ifndef _UINTPTR_T_DEFINED
  159. typedef size_t uintptr_t;
  160. # endif
  161. typedef unsigned __int8 uint8_t;
  162. typedef unsigned __int16 uint16_t;
  163. typedef unsigned __int32 uint32_t;
  164. }
  165. #else
  166. # include <stdint.h>
  167. #endif
  168. // Memory allocation
  169. PUGI_IMPL_NS_BEGIN
  170. PUGI_IMPL_FN void* default_allocate(size_t size)
  171. {
  172. return malloc(size);
  173. }
  174. PUGI_IMPL_FN void default_deallocate(void* ptr)
  175. {
  176. free(ptr);
  177. }
  178. template <typename T>
  179. struct xml_memory_management_function_storage
  180. {
  181. static allocation_function allocate;
  182. static deallocation_function deallocate;
  183. };
  184. // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
  185. // Without a template<> we'll get multiple definitions of the same static
  186. template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
  187. template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
  188. typedef xml_memory_management_function_storage<int> xml_memory;
  189. PUGI_IMPL_NS_END
  190. // String utilities
  191. PUGI_IMPL_NS_BEGIN
  192. // Get string length
  193. PUGI_IMPL_FN size_t strlength(const char_t* s)
  194. {
  195. assert(s);
  196. #ifdef PUGIXML_WCHAR_MODE
  197. return wcslen(s);
  198. #else
  199. return strlen(s);
  200. #endif
  201. }
  202. // Compare two strings
  203. PUGI_IMPL_FN bool strequal(const char_t* src, const char_t* dst)
  204. {
  205. assert(src && dst);
  206. #ifdef PUGIXML_WCHAR_MODE
  207. return wcscmp(src, dst) == 0;
  208. #else
  209. return strcmp(src, dst) == 0;
  210. #endif
  211. }
  212. // Compare lhs with [rhs_begin, rhs_end)
  213. PUGI_IMPL_FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
  214. {
  215. for (size_t i = 0; i < count; ++i)
  216. if (lhs[i] != rhs[i])
  217. return false;
  218. return lhs[count] == 0;
  219. }
  220. // Get length of wide string, even if CRT lacks wide character support
  221. PUGI_IMPL_FN size_t strlength_wide(const wchar_t* s)
  222. {
  223. assert(s);
  224. #ifdef PUGIXML_WCHAR_MODE
  225. return wcslen(s);
  226. #else
  227. const wchar_t* end = s;
  228. while (*end) end++;
  229. return static_cast<size_t>(end - s);
  230. #endif
  231. }
  232. PUGI_IMPL_NS_END
  233. // auto_ptr-like object for exception recovery
  234. PUGI_IMPL_NS_BEGIN
  235. template <typename T> struct auto_deleter
  236. {
  237. typedef void (*D)(T*);
  238. T* data;
  239. D deleter;
  240. auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
  241. {
  242. }
  243. ~auto_deleter()
  244. {
  245. if (data) deleter(data);
  246. }
  247. T* release()
  248. {
  249. T* result = data;
  250. data = 0;
  251. return result;
  252. }
  253. };
  254. PUGI_IMPL_NS_END
  255. #ifdef PUGIXML_COMPACT
  256. PUGI_IMPL_NS_BEGIN
  257. class compact_hash_table
  258. {
  259. public:
  260. compact_hash_table(): _items(0), _capacity(0), _count(0)
  261. {
  262. }
  263. void clear()
  264. {
  265. if (_items)
  266. {
  267. xml_memory::deallocate(_items);
  268. _items = 0;
  269. _capacity = 0;
  270. _count = 0;
  271. }
  272. }
  273. void* find(const void* key)
  274. {
  275. if (_capacity == 0) return 0;
  276. item_t* item = get_item(key);
  277. assert(item);
  278. assert(item->key == key || (item->key == 0 && item->value == 0));
  279. return item->value;
  280. }
  281. void insert(const void* key, void* value)
  282. {
  283. assert(_capacity != 0 && _count < _capacity - _capacity / 4);
  284. item_t* item = get_item(key);
  285. assert(item);
  286. if (item->key == 0)
  287. {
  288. _count++;
  289. item->key = key;
  290. }
  291. item->value = value;
  292. }
  293. bool reserve(size_t extra = 16)
  294. {
  295. if (_count + extra >= _capacity - _capacity / 4)
  296. return rehash(_count + extra);
  297. return true;
  298. }
  299. private:
  300. struct item_t
  301. {
  302. const void* key;
  303. void* value;
  304. };
  305. item_t* _items;
  306. size_t _capacity;
  307. size_t _count;
  308. bool rehash(size_t count);
  309. item_t* get_item(const void* key)
  310. {
  311. assert(key);
  312. assert(_capacity > 0);
  313. size_t hashmod = _capacity - 1;
  314. size_t bucket = hash(key) & hashmod;
  315. for (size_t probe = 0; probe <= hashmod; ++probe)
  316. {
  317. item_t& probe_item = _items[bucket];
  318. if (probe_item.key == key || probe_item.key == 0)
  319. return &probe_item;
  320. // hash collision, quadratic probing
  321. bucket = (bucket + probe + 1) & hashmod;
  322. }
  323. assert(false && "Hash table is full"); // unreachable
  324. return 0;
  325. }
  326. static PUGI_IMPL_UNSIGNED_OVERFLOW unsigned int hash(const void* key)
  327. {
  328. unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff);
  329. // MurmurHash3 32-bit finalizer
  330. h ^= h >> 16;
  331. h *= 0x85ebca6bu;
  332. h ^= h >> 13;
  333. h *= 0xc2b2ae35u;
  334. h ^= h >> 16;
  335. return h;
  336. }
  337. };
  338. PUGI_IMPL_FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
  339. {
  340. size_t capacity = 32;
  341. while (count >= capacity - capacity / 4)
  342. capacity *= 2;
  343. compact_hash_table rt;
  344. rt._capacity = capacity;
  345. rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
  346. if (!rt._items)
  347. return false;
  348. memset(rt._items, 0, sizeof(item_t) * capacity);
  349. for (size_t i = 0; i < _capacity; ++i)
  350. if (_items[i].key)
  351. rt.insert(_items[i].key, _items[i].value);
  352. if (_items)
  353. xml_memory::deallocate(_items);
  354. _capacity = capacity;
  355. _items = rt._items;
  356. assert(_count == rt._count);
  357. return true;
  358. }
  359. PUGI_IMPL_NS_END
  360. #endif
  361. PUGI_IMPL_NS_BEGIN
  362. #ifdef PUGIXML_COMPACT
  363. static const uintptr_t xml_memory_block_alignment = 4;
  364. #else
  365. static const uintptr_t xml_memory_block_alignment = sizeof(void*);
  366. #endif
  367. // extra metadata bits
  368. static const uintptr_t xml_memory_page_contents_shared_mask = 64;
  369. static const uintptr_t xml_memory_page_name_allocated_mask = 32;
  370. static const uintptr_t xml_memory_page_value_allocated_mask = 16;
  371. static const uintptr_t xml_memory_page_type_mask = 15;
  372. // combined masks for string uniqueness
  373. static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
  374. static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
  375. #ifdef PUGIXML_COMPACT
  376. #define PUGI_IMPL_GETHEADER_IMPL(object, page, flags) // unused
  377. #define PUGI_IMPL_GETPAGE_IMPL(header) (header).get_page()
  378. #else
  379. #define PUGI_IMPL_GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
  380. // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
  381. #define PUGI_IMPL_GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
  382. #endif
  383. #define PUGI_IMPL_GETPAGE(n) PUGI_IMPL_GETPAGE_IMPL((n)->header)
  384. #define PUGI_IMPL_NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
  385. struct xml_allocator;
  386. struct xml_memory_page
  387. {
  388. static xml_memory_page* construct(void* memory)
  389. {
  390. xml_memory_page* result = static_cast<xml_memory_page*>(memory);
  391. result->allocator = 0;
  392. result->prev = 0;
  393. result->next = 0;
  394. result->busy_size = 0;
  395. result->freed_size = 0;
  396. #ifdef PUGIXML_COMPACT
  397. result->compact_string_base = 0;
  398. result->compact_shared_parent = 0;
  399. result->compact_page_marker = 0;
  400. #endif
  401. return result;
  402. }
  403. xml_allocator* allocator;
  404. xml_memory_page* prev;
  405. xml_memory_page* next;
  406. size_t busy_size;
  407. size_t freed_size;
  408. #ifdef PUGIXML_COMPACT
  409. char_t* compact_string_base;
  410. void* compact_shared_parent;
  411. uint32_t* compact_page_marker;
  412. #endif
  413. };
  414. static const size_t xml_memory_page_size =
  415. #ifdef PUGIXML_MEMORY_PAGE_SIZE
  416. (PUGIXML_MEMORY_PAGE_SIZE)
  417. #else
  418. 32768
  419. #endif
  420. - sizeof(xml_memory_page);
  421. struct xml_memory_string_header
  422. {
  423. uint16_t page_offset; // offset from page->data
  424. uint16_t full_size; // 0 if string occupies whole page
  425. };
  426. struct xml_allocator
  427. {
  428. xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
  429. {
  430. #ifdef PUGIXML_COMPACT
  431. _hash = 0;
  432. #endif
  433. }
  434. xml_memory_page* allocate_page(size_t data_size)
  435. {
  436. size_t size = sizeof(xml_memory_page) + data_size;
  437. // allocate block with some alignment, leaving memory for worst-case padding
  438. void* memory = xml_memory::allocate(size);
  439. if (!memory) return 0;
  440. // prepare page structure
  441. xml_memory_page* page = xml_memory_page::construct(memory);
  442. assert(page);
  443. assert(this == _root->allocator);
  444. page->allocator = this;
  445. return page;
  446. }
  447. static void deallocate_page(xml_memory_page* page)
  448. {
  449. xml_memory::deallocate(page);
  450. }
  451. void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
  452. void* allocate_memory(size_t size, xml_memory_page*& out_page)
  453. {
  454. if (PUGI_IMPL_UNLIKELY(_busy_size + size > xml_memory_page_size))
  455. return allocate_memory_oob(size, out_page);
  456. void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
  457. _busy_size += size;
  458. out_page = _root;
  459. return buf;
  460. }
  461. #ifdef PUGIXML_COMPACT
  462. void* allocate_object(size_t size, xml_memory_page*& out_page)
  463. {
  464. void* result = allocate_memory(size + sizeof(uint32_t), out_page);
  465. if (!result) return 0;
  466. // adjust for marker
  467. ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
  468. if (PUGI_IMPL_UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
  469. {
  470. // insert new marker
  471. uint32_t* marker = static_cast<uint32_t*>(result);
  472. *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
  473. out_page->compact_page_marker = marker;
  474. // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
  475. // this will make sure deallocate_memory correctly tracks the size
  476. out_page->freed_size += sizeof(uint32_t);
  477. return marker + 1;
  478. }
  479. else
  480. {
  481. // roll back uint32_t part
  482. _busy_size -= sizeof(uint32_t);
  483. return result;
  484. }
  485. }
  486. #else
  487. void* allocate_object(size_t size, xml_memory_page*& out_page)
  488. {
  489. return allocate_memory(size, out_page);
  490. }
  491. #endif
  492. void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
  493. {
  494. if (page == _root) page->busy_size = _busy_size;
  495. assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
  496. (void)!ptr;
  497. page->freed_size += size;
  498. assert(page->freed_size <= page->busy_size);
  499. if (page->freed_size == page->busy_size)
  500. {
  501. if (page->next == 0)
  502. {
  503. assert(_root == page);
  504. // top page freed, just reset sizes
  505. page->busy_size = 0;
  506. page->freed_size = 0;
  507. #ifdef PUGIXML_COMPACT
  508. // reset compact state to maximize efficiency
  509. page->compact_string_base = 0;
  510. page->compact_shared_parent = 0;
  511. page->compact_page_marker = 0;
  512. #endif
  513. _busy_size = 0;
  514. }
  515. else
  516. {
  517. assert(_root != page);
  518. assert(page->prev);
  519. // remove from the list
  520. page->prev->next = page->next;
  521. page->next->prev = page->prev;
  522. // deallocate
  523. deallocate_page(page);
  524. }
  525. }
  526. }
  527. char_t* allocate_string(size_t length)
  528. {
  529. static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
  530. PUGI_IMPL_STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
  531. // allocate memory for string and header block
  532. size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
  533. // round size up to block alignment boundary
  534. size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
  535. xml_memory_page* page;
  536. xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
  537. if (!header) return 0;
  538. // setup header
  539. ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
  540. assert(page_offset % xml_memory_block_alignment == 0);
  541. assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
  542. header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
  543. // full_size == 0 for large strings that occupy the whole page
  544. assert(full_size % xml_memory_block_alignment == 0);
  545. assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
  546. header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
  547. // round-trip through void* to avoid 'cast increases required alignment of target type' warning
  548. // header is guaranteed a pointer-sized alignment, which should be enough for char_t
  549. return static_cast<char_t*>(static_cast<void*>(header + 1));
  550. }
  551. void deallocate_string(char_t* string)
  552. {
  553. // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
  554. // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
  555. // get header
  556. xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
  557. assert(header);
  558. // deallocate
  559. size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
  560. xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
  561. // if full_size == 0 then this string occupies the whole page
  562. size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
  563. deallocate_memory(header, full_size, page);
  564. }
  565. bool reserve()
  566. {
  567. #ifdef PUGIXML_COMPACT
  568. return _hash->reserve();
  569. #else
  570. return true;
  571. #endif
  572. }
  573. xml_memory_page* _root;
  574. size_t _busy_size;
  575. #ifdef PUGIXML_COMPACT
  576. compact_hash_table* _hash;
  577. #endif
  578. };
  579. PUGI_IMPL_FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
  580. {
  581. const size_t large_allocation_threshold = xml_memory_page_size / 4;
  582. xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
  583. out_page = page;
  584. if (!page) return 0;
  585. if (size <= large_allocation_threshold)
  586. {
  587. _root->busy_size = _busy_size;
  588. // insert page at the end of linked list
  589. page->prev = _root;
  590. _root->next = page;
  591. _root = page;
  592. _busy_size = size;
  593. }
  594. else
  595. {
  596. // insert page before the end of linked list, so that it is deleted as soon as possible
  597. // the last page is not deleted even if it's empty (see deallocate_memory)
  598. assert(_root->prev);
  599. page->prev = _root->prev;
  600. page->next = _root;
  601. _root->prev->next = page;
  602. _root->prev = page;
  603. page->busy_size = size;
  604. }
  605. return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
  606. }
  607. PUGI_IMPL_NS_END
  608. #ifdef PUGIXML_COMPACT
  609. PUGI_IMPL_NS_BEGIN
  610. static const uintptr_t compact_alignment_log2 = 2;
  611. static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
  612. class compact_header
  613. {
  614. public:
  615. compact_header(xml_memory_page* page, unsigned int flags)
  616. {
  617. PUGI_IMPL_STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
  618. ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
  619. assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
  620. _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
  621. _flags = static_cast<unsigned char>(flags);
  622. }
  623. void operator&=(uintptr_t mod)
  624. {
  625. _flags &= static_cast<unsigned char>(mod);
  626. }
  627. void operator|=(uintptr_t mod)
  628. {
  629. _flags |= static_cast<unsigned char>(mod);
  630. }
  631. uintptr_t operator&(uintptr_t mod) const
  632. {
  633. return _flags & mod;
  634. }
  635. xml_memory_page* get_page() const
  636. {
  637. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  638. const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
  639. const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
  640. return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
  641. }
  642. private:
  643. unsigned char _page;
  644. unsigned char _flags;
  645. };
  646. PUGI_IMPL_FN xml_memory_page* compact_get_page(const void* object, int header_offset)
  647. {
  648. const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
  649. return header->get_page();
  650. }
  651. template <int header_offset, typename T> PUGI_IMPL_FN_NO_INLINE T* compact_get_value(const void* object)
  652. {
  653. return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
  654. }
  655. template <int header_offset, typename T> PUGI_IMPL_FN_NO_INLINE void compact_set_value(const void* object, T* value)
  656. {
  657. compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
  658. }
  659. template <typename T, int header_offset, int start = -126> class compact_pointer
  660. {
  661. public:
  662. compact_pointer(): _data(0)
  663. {
  664. }
  665. void operator=(const compact_pointer& rhs)
  666. {
  667. *this = rhs + 0;
  668. }
  669. void operator=(T* value)
  670. {
  671. if (value)
  672. {
  673. // value is guaranteed to be compact-aligned; 'this' is not
  674. // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
  675. // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
  676. // compensate for arithmetic shift rounding for negative values
  677. ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
  678. ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
  679. if (static_cast<uintptr_t>(offset) <= 253)
  680. _data = static_cast<unsigned char>(offset + 1);
  681. else
  682. {
  683. compact_set_value<header_offset>(this, value);
  684. _data = 255;
  685. }
  686. }
  687. else
  688. _data = 0;
  689. }
  690. operator T*() const
  691. {
  692. if (_data)
  693. {
  694. if (_data < 255)
  695. {
  696. uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
  697. return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
  698. }
  699. else
  700. return compact_get_value<header_offset, T>(this);
  701. }
  702. else
  703. return 0;
  704. }
  705. T* operator->() const
  706. {
  707. return *this;
  708. }
  709. private:
  710. unsigned char _data;
  711. };
  712. template <typename T, int header_offset> class compact_pointer_parent
  713. {
  714. public:
  715. compact_pointer_parent(): _data(0)
  716. {
  717. }
  718. void operator=(const compact_pointer_parent& rhs)
  719. {
  720. *this = rhs + 0;
  721. }
  722. void operator=(T* value)
  723. {
  724. if (value)
  725. {
  726. // value is guaranteed to be compact-aligned; 'this' is not
  727. // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
  728. // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
  729. // compensate for arithmetic shift behavior for negative values
  730. ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
  731. ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
  732. if (static_cast<uintptr_t>(offset) <= 65533)
  733. {
  734. _data = static_cast<unsigned short>(offset + 1);
  735. }
  736. else
  737. {
  738. xml_memory_page* page = compact_get_page(this, header_offset);
  739. if (PUGI_IMPL_UNLIKELY(page->compact_shared_parent == 0))
  740. page->compact_shared_parent = value;
  741. if (page->compact_shared_parent == value)
  742. {
  743. _data = 65534;
  744. }
  745. else
  746. {
  747. compact_set_value<header_offset>(this, value);
  748. _data = 65535;
  749. }
  750. }
  751. }
  752. else
  753. {
  754. _data = 0;
  755. }
  756. }
  757. operator T*() const
  758. {
  759. if (_data)
  760. {
  761. if (_data < 65534)
  762. {
  763. uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
  764. return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
  765. }
  766. else if (_data == 65534)
  767. return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
  768. else
  769. return compact_get_value<header_offset, T>(this);
  770. }
  771. else
  772. return 0;
  773. }
  774. T* operator->() const
  775. {
  776. return *this;
  777. }
  778. private:
  779. uint16_t _data;
  780. };
  781. template <int header_offset, int base_offset> class compact_string
  782. {
  783. public:
  784. compact_string(): _data(0)
  785. {
  786. }
  787. void operator=(const compact_string& rhs)
  788. {
  789. *this = rhs + 0;
  790. }
  791. void operator=(char_t* value)
  792. {
  793. if (value)
  794. {
  795. xml_memory_page* page = compact_get_page(this, header_offset);
  796. if (PUGI_IMPL_UNLIKELY(page->compact_string_base == 0))
  797. page->compact_string_base = value;
  798. ptrdiff_t offset = value - page->compact_string_base;
  799. if (static_cast<uintptr_t>(offset) < (65535 << 7))
  800. {
  801. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  802. uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
  803. if (*base == 0)
  804. {
  805. *base = static_cast<uint16_t>((offset >> 7) + 1);
  806. _data = static_cast<unsigned char>((offset & 127) + 1);
  807. }
  808. else
  809. {
  810. ptrdiff_t remainder = offset - ((*base - 1) << 7);
  811. if (static_cast<uintptr_t>(remainder) <= 253)
  812. {
  813. _data = static_cast<unsigned char>(remainder + 1);
  814. }
  815. else
  816. {
  817. compact_set_value<header_offset>(this, value);
  818. _data = 255;
  819. }
  820. }
  821. }
  822. else
  823. {
  824. compact_set_value<header_offset>(this, value);
  825. _data = 255;
  826. }
  827. }
  828. else
  829. {
  830. _data = 0;
  831. }
  832. }
  833. operator char_t*() const
  834. {
  835. if (_data)
  836. {
  837. if (_data < 255)
  838. {
  839. xml_memory_page* page = compact_get_page(this, header_offset);
  840. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  841. const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
  842. assert(*base);
  843. ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
  844. return page->compact_string_base + offset;
  845. }
  846. else
  847. {
  848. return compact_get_value<header_offset, char_t>(this);
  849. }
  850. }
  851. else
  852. return 0;
  853. }
  854. private:
  855. unsigned char _data;
  856. };
  857. PUGI_IMPL_NS_END
  858. #endif
  859. #ifdef PUGIXML_COMPACT
  860. namespace pugi
  861. {
  862. struct xml_attribute_struct
  863. {
  864. xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
  865. {
  866. PUGI_IMPL_STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
  867. }
  868. impl::compact_header header;
  869. uint16_t namevalue_base;
  870. impl::compact_string<4, 2> name;
  871. impl::compact_string<5, 3> value;
  872. impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
  873. impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
  874. };
  875. struct xml_node_struct
  876. {
  877. xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
  878. {
  879. PUGI_IMPL_STATIC_ASSERT(sizeof(xml_node_struct) == 12);
  880. }
  881. impl::compact_header header;
  882. uint16_t namevalue_base;
  883. impl::compact_string<4, 2> name;
  884. impl::compact_string<5, 3> value;
  885. impl::compact_pointer_parent<xml_node_struct, 6> parent;
  886. impl::compact_pointer<xml_node_struct, 8, 0> first_child;
  887. impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
  888. impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
  889. impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
  890. };
  891. }
  892. #else
  893. namespace pugi
  894. {
  895. struct xml_attribute_struct
  896. {
  897. xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
  898. {
  899. header = PUGI_IMPL_GETHEADER_IMPL(this, page, 0);
  900. }
  901. uintptr_t header;
  902. char_t* name;
  903. char_t* value;
  904. xml_attribute_struct* prev_attribute_c;
  905. xml_attribute_struct* next_attribute;
  906. };
  907. struct xml_node_struct
  908. {
  909. xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
  910. {
  911. header = PUGI_IMPL_GETHEADER_IMPL(this, page, type);
  912. }
  913. uintptr_t header;
  914. char_t* name;
  915. char_t* value;
  916. xml_node_struct* parent;
  917. xml_node_struct* first_child;
  918. xml_node_struct* prev_sibling_c;
  919. xml_node_struct* next_sibling;
  920. xml_attribute_struct* first_attribute;
  921. };
  922. }
  923. #endif
  924. PUGI_IMPL_NS_BEGIN
  925. struct xml_extra_buffer
  926. {
  927. char_t* buffer;
  928. xml_extra_buffer* next;
  929. };
  930. struct xml_document_struct: public xml_node_struct, public xml_allocator
  931. {
  932. xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
  933. {
  934. }
  935. const char_t* buffer;
  936. xml_extra_buffer* extra_buffers;
  937. #ifdef PUGIXML_COMPACT
  938. compact_hash_table hash;
  939. #endif
  940. };
  941. template <typename Object> inline xml_allocator& get_allocator(const Object* object)
  942. {
  943. assert(object);
  944. return *PUGI_IMPL_GETPAGE(object)->allocator;
  945. }
  946. template <typename Object> inline xml_document_struct& get_document(const Object* object)
  947. {
  948. assert(object);
  949. return *static_cast<xml_document_struct*>(PUGI_IMPL_GETPAGE(object)->allocator);
  950. }
  951. PUGI_IMPL_NS_END
  952. // Low-level DOM operations
  953. PUGI_IMPL_NS_BEGIN
  954. inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
  955. {
  956. xml_memory_page* page;
  957. void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
  958. if (!memory) return 0;
  959. return new (memory) xml_attribute_struct(page);
  960. }
  961. inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
  962. {
  963. xml_memory_page* page;
  964. void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
  965. if (!memory) return 0;
  966. return new (memory) xml_node_struct(page, type);
  967. }
  968. inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
  969. {
  970. if (a->header & impl::xml_memory_page_name_allocated_mask)
  971. alloc.deallocate_string(a->name);
  972. if (a->header & impl::xml_memory_page_value_allocated_mask)
  973. alloc.deallocate_string(a->value);
  974. alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI_IMPL_GETPAGE(a));
  975. }
  976. inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
  977. {
  978. if (n->header & impl::xml_memory_page_name_allocated_mask)
  979. alloc.deallocate_string(n->name);
  980. if (n->header & impl::xml_memory_page_value_allocated_mask)
  981. alloc.deallocate_string(n->value);
  982. for (xml_attribute_struct* attr = n->first_attribute; attr; )
  983. {
  984. xml_attribute_struct* next = attr->next_attribute;
  985. destroy_attribute(attr, alloc);
  986. attr = next;
  987. }
  988. for (xml_node_struct* child = n->first_child; child; )
  989. {
  990. xml_node_struct* next = child->next_sibling;
  991. destroy_node(child, alloc);
  992. child = next;
  993. }
  994. alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI_IMPL_GETPAGE(n));
  995. }
  996. inline void append_node(xml_node_struct* child, xml_node_struct* node)
  997. {
  998. child->parent = node;
  999. xml_node_struct* head = node->first_child;
  1000. if (head)
  1001. {
  1002. xml_node_struct* tail = head->prev_sibling_c;
  1003. tail->next_sibling = child;
  1004. child->prev_sibling_c = tail;
  1005. head->prev_sibling_c = child;
  1006. }
  1007. else
  1008. {
  1009. node->first_child = child;
  1010. child->prev_sibling_c = child;
  1011. }
  1012. }
  1013. inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
  1014. {
  1015. child->parent = node;
  1016. xml_node_struct* head = node->first_child;
  1017. if (head)
  1018. {
  1019. child->prev_sibling_c = head->prev_sibling_c;
  1020. head->prev_sibling_c = child;
  1021. }
  1022. else
  1023. child->prev_sibling_c = child;
  1024. child->next_sibling = head;
  1025. node->first_child = child;
  1026. }
  1027. inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
  1028. {
  1029. xml_node_struct* parent = node->parent;
  1030. child->parent = parent;
  1031. xml_node_struct* next = node->next_sibling;
  1032. if (next)
  1033. next->prev_sibling_c = child;
  1034. else
  1035. parent->first_child->prev_sibling_c = child;
  1036. child->next_sibling = next;
  1037. child->prev_sibling_c = node;
  1038. node->next_sibling = child;
  1039. }
  1040. inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
  1041. {
  1042. xml_node_struct* parent = node->parent;
  1043. child->parent = parent;
  1044. xml_node_struct* prev = node->prev_sibling_c;
  1045. if (prev->next_sibling)
  1046. prev->next_sibling = child;
  1047. else
  1048. parent->first_child = child;
  1049. child->prev_sibling_c = prev;
  1050. child->next_sibling = node;
  1051. node->prev_sibling_c = child;
  1052. }
  1053. inline void remove_node(xml_node_struct* node)
  1054. {
  1055. xml_node_struct* parent = node->parent;
  1056. xml_node_struct* next = node->next_sibling;
  1057. xml_node_struct* prev = node->prev_sibling_c;
  1058. if (next)
  1059. next->prev_sibling_c = prev;
  1060. else
  1061. parent->first_child->prev_sibling_c = prev;
  1062. if (prev->next_sibling)
  1063. prev->next_sibling = next;
  1064. else
  1065. parent->first_child = next;
  1066. node->parent = 0;
  1067. node->prev_sibling_c = 0;
  1068. node->next_sibling = 0;
  1069. }
  1070. inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
  1071. {
  1072. xml_attribute_struct* head = node->first_attribute;
  1073. if (head)
  1074. {
  1075. xml_attribute_struct* tail = head->prev_attribute_c;
  1076. tail->next_attribute = attr;
  1077. attr->prev_attribute_c = tail;
  1078. head->prev_attribute_c = attr;
  1079. }
  1080. else
  1081. {
  1082. node->first_attribute = attr;
  1083. attr->prev_attribute_c = attr;
  1084. }
  1085. }
  1086. inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
  1087. {
  1088. xml_attribute_struct* head = node->first_attribute;
  1089. if (head)
  1090. {
  1091. attr->prev_attribute_c = head->prev_attribute_c;
  1092. head->prev_attribute_c = attr;
  1093. }
  1094. else
  1095. attr->prev_attribute_c = attr;
  1096. attr->next_attribute = head;
  1097. node->first_attribute = attr;
  1098. }
  1099. inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
  1100. {
  1101. xml_attribute_struct* next = place->next_attribute;
  1102. if (next)
  1103. next->prev_attribute_c = attr;
  1104. else
  1105. node->first_attribute->prev_attribute_c = attr;
  1106. attr->next_attribute = next;
  1107. attr->prev_attribute_c = place;
  1108. place->next_attribute = attr;
  1109. }
  1110. inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
  1111. {
  1112. xml_attribute_struct* prev = place->prev_attribute_c;
  1113. if (prev->next_attribute)
  1114. prev->next_attribute = attr;
  1115. else
  1116. node->first_attribute = attr;
  1117. attr->prev_attribute_c = prev;
  1118. attr->next_attribute = place;
  1119. place->prev_attribute_c = attr;
  1120. }
  1121. inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
  1122. {
  1123. xml_attribute_struct* next = attr->next_attribute;
  1124. xml_attribute_struct* prev = attr->prev_attribute_c;
  1125. if (next)
  1126. next->prev_attribute_c = prev;
  1127. else
  1128. node->first_attribute->prev_attribute_c = prev;
  1129. if (prev->next_attribute)
  1130. prev->next_attribute = next;
  1131. else
  1132. node->first_attribute = next;
  1133. attr->prev_attribute_c = 0;
  1134. attr->next_attribute = 0;
  1135. }
  1136. PUGI_IMPL_FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
  1137. {
  1138. if (!alloc.reserve()) return 0;
  1139. xml_node_struct* child = allocate_node(alloc, type);
  1140. if (!child) return 0;
  1141. append_node(child, node);
  1142. return child;
  1143. }
  1144. PUGI_IMPL_FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
  1145. {
  1146. if (!alloc.reserve()) return 0;
  1147. xml_attribute_struct* attr = allocate_attribute(alloc);
  1148. if (!attr) return 0;
  1149. append_attribute(attr, node);
  1150. return attr;
  1151. }
  1152. PUGI_IMPL_NS_END
  1153. // Helper classes for code generation
  1154. PUGI_IMPL_NS_BEGIN
  1155. struct opt_false
  1156. {
  1157. enum { value = 0 };
  1158. };
  1159. struct opt_true
  1160. {
  1161. enum { value = 1 };
  1162. };
  1163. PUGI_IMPL_NS_END
  1164. // Unicode utilities
  1165. PUGI_IMPL_NS_BEGIN
  1166. inline uint16_t endian_swap(uint16_t value)
  1167. {
  1168. return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
  1169. }
  1170. inline uint32_t endian_swap(uint32_t value)
  1171. {
  1172. return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
  1173. }
  1174. struct utf8_counter
  1175. {
  1176. typedef size_t value_type;
  1177. static value_type low(value_type result, uint32_t ch)
  1178. {
  1179. // U+0000..U+007F
  1180. if (ch < 0x80) return result + 1;
  1181. // U+0080..U+07FF
  1182. else if (ch < 0x800) return result + 2;
  1183. // U+0800..U+FFFF
  1184. else return result + 3;
  1185. }
  1186. static value_type high(value_type result, uint32_t)
  1187. {
  1188. // U+10000..U+10FFFF
  1189. return result + 4;
  1190. }
  1191. };
  1192. struct utf8_writer
  1193. {
  1194. typedef uint8_t* value_type;
  1195. static value_type low(value_type result, uint32_t ch)
  1196. {
  1197. // U+0000..U+007F
  1198. if (ch < 0x80)
  1199. {
  1200. *result = static_cast<uint8_t>(ch);
  1201. return result + 1;
  1202. }
  1203. // U+0080..U+07FF
  1204. else if (ch < 0x800)
  1205. {
  1206. result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
  1207. result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
  1208. return result + 2;
  1209. }
  1210. // U+0800..U+FFFF
  1211. else
  1212. {
  1213. result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
  1214. result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
  1215. result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
  1216. return result + 3;
  1217. }
  1218. }
  1219. static value_type high(value_type result, uint32_t ch)
  1220. {
  1221. // U+10000..U+10FFFF
  1222. result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
  1223. result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
  1224. result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
  1225. result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
  1226. return result + 4;
  1227. }
  1228. static value_type any(value_type result, uint32_t ch)
  1229. {
  1230. return (ch < 0x10000) ? low(result, ch) : high(result, ch);
  1231. }
  1232. };
  1233. struct utf16_counter
  1234. {
  1235. typedef size_t value_type;
  1236. static value_type low(value_type result, uint32_t)
  1237. {
  1238. return result + 1;
  1239. }
  1240. static value_type high(value_type result, uint32_t)
  1241. {
  1242. return result + 2;
  1243. }
  1244. };
  1245. struct utf16_writer
  1246. {
  1247. typedef uint16_t* value_type;
  1248. static value_type low(value_type result, uint32_t ch)
  1249. {
  1250. *result = static_cast<uint16_t>(ch);
  1251. return result + 1;
  1252. }
  1253. static value_type high(value_type result, uint32_t ch)
  1254. {
  1255. uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
  1256. uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
  1257. result[0] = static_cast<uint16_t>(0xD800 + msh);
  1258. result[1] = static_cast<uint16_t>(0xDC00 + lsh);
  1259. return result + 2;
  1260. }
  1261. static value_type any(value_type result, uint32_t ch)
  1262. {
  1263. return (ch < 0x10000) ? low(result, ch) : high(result, ch);
  1264. }
  1265. };
  1266. struct utf32_counter
  1267. {
  1268. typedef size_t value_type;
  1269. static value_type low(value_type result, uint32_t)
  1270. {
  1271. return result + 1;
  1272. }
  1273. static value_type high(value_type result, uint32_t)
  1274. {
  1275. return result + 1;
  1276. }
  1277. };
  1278. struct utf32_writer
  1279. {
  1280. typedef uint32_t* value_type;
  1281. static value_type low(value_type result, uint32_t ch)
  1282. {
  1283. *result = ch;
  1284. return result + 1;
  1285. }
  1286. static value_type high(value_type result, uint32_t ch)
  1287. {
  1288. *result = ch;
  1289. return result + 1;
  1290. }
  1291. static value_type any(value_type result, uint32_t ch)
  1292. {
  1293. *result = ch;
  1294. return result + 1;
  1295. }
  1296. };
  1297. struct latin1_writer
  1298. {
  1299. typedef uint8_t* value_type;
  1300. static value_type low(value_type result, uint32_t ch)
  1301. {
  1302. *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
  1303. return result + 1;
  1304. }
  1305. static value_type high(value_type result, uint32_t ch)
  1306. {
  1307. (void)ch;
  1308. *result = '?';
  1309. return result + 1;
  1310. }
  1311. };
  1312. struct utf8_decoder
  1313. {
  1314. typedef uint8_t type;
  1315. template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
  1316. {
  1317. const uint8_t utf8_byte_mask = 0x3f;
  1318. while (size)
  1319. {
  1320. uint8_t lead = *data;
  1321. // 0xxxxxxx -> U+0000..U+007F
  1322. if (lead < 0x80)
  1323. {
  1324. result = Traits::low(result, lead);
  1325. data += 1;
  1326. size -= 1;
  1327. // process aligned single-byte (ascii) blocks
  1328. if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
  1329. {
  1330. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  1331. while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
  1332. {
  1333. result = Traits::low(result, data[0]);
  1334. result = Traits::low(result, data[1]);
  1335. result = Traits::low(result, data[2]);
  1336. result = Traits::low(result, data[3]);
  1337. data += 4;
  1338. size -= 4;
  1339. }
  1340. }
  1341. }
  1342. // 110xxxxx -> U+0080..U+07FF
  1343. else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
  1344. {
  1345. result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
  1346. data += 2;
  1347. size -= 2;
  1348. }
  1349. // 1110xxxx -> U+0800-U+FFFF
  1350. else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
  1351. {
  1352. result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
  1353. data += 3;
  1354. size -= 3;
  1355. }
  1356. // 11110xxx -> U+10000..U+10FFFF
  1357. else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
  1358. {
  1359. result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
  1360. data += 4;
  1361. size -= 4;
  1362. }
  1363. // 10xxxxxx or 11111xxx -> invalid
  1364. else
  1365. {
  1366. data += 1;
  1367. size -= 1;
  1368. }
  1369. }
  1370. return result;
  1371. }
  1372. };
  1373. template <typename opt_swap> struct utf16_decoder
  1374. {
  1375. typedef uint16_t type;
  1376. template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
  1377. {
  1378. while (size)
  1379. {
  1380. uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
  1381. // U+0000..U+D7FF
  1382. if (lead < 0xD800)
  1383. {
  1384. result = Traits::low(result, lead);
  1385. data += 1;
  1386. size -= 1;
  1387. }
  1388. // U+E000..U+FFFF
  1389. else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
  1390. {
  1391. result = Traits::low(result, lead);
  1392. data += 1;
  1393. size -= 1;
  1394. }
  1395. // surrogate pair lead
  1396. else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
  1397. {
  1398. uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
  1399. if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
  1400. {
  1401. result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
  1402. data += 2;
  1403. size -= 2;
  1404. }
  1405. else
  1406. {
  1407. data += 1;
  1408. size -= 1;
  1409. }
  1410. }
  1411. else
  1412. {
  1413. data += 1;
  1414. size -= 1;
  1415. }
  1416. }
  1417. return result;
  1418. }
  1419. };
  1420. template <typename opt_swap> struct utf32_decoder
  1421. {
  1422. typedef uint32_t type;
  1423. template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
  1424. {
  1425. while (size)
  1426. {
  1427. uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
  1428. // U+0000..U+FFFF
  1429. if (lead < 0x10000)
  1430. {
  1431. result = Traits::low(result, lead);
  1432. data += 1;
  1433. size -= 1;
  1434. }
  1435. // U+10000..U+10FFFF
  1436. else
  1437. {
  1438. result = Traits::high(result, lead);
  1439. data += 1;
  1440. size -= 1;
  1441. }
  1442. }
  1443. return result;
  1444. }
  1445. };
  1446. struct latin1_decoder
  1447. {
  1448. typedef uint8_t type;
  1449. template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
  1450. {
  1451. while (size)
  1452. {
  1453. result = Traits::low(result, *data);
  1454. data += 1;
  1455. size -= 1;
  1456. }
  1457. return result;
  1458. }
  1459. };
  1460. template <size_t size> struct wchar_selector;
  1461. template <> struct wchar_selector<2>
  1462. {
  1463. typedef uint16_t type;
  1464. typedef utf16_counter counter;
  1465. typedef utf16_writer writer;
  1466. typedef utf16_decoder<opt_false> decoder;
  1467. };
  1468. template <> struct wchar_selector<4>
  1469. {
  1470. typedef uint32_t type;
  1471. typedef utf32_counter counter;
  1472. typedef utf32_writer writer;
  1473. typedef utf32_decoder<opt_false> decoder;
  1474. };
  1475. typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
  1476. typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
  1477. struct wchar_decoder
  1478. {
  1479. typedef wchar_t type;
  1480. template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
  1481. {
  1482. typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
  1483. return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
  1484. }
  1485. };
  1486. #ifdef PUGIXML_WCHAR_MODE
  1487. PUGI_IMPL_FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
  1488. {
  1489. for (size_t i = 0; i < length; ++i)
  1490. result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
  1491. }
  1492. #endif
  1493. PUGI_IMPL_NS_END
  1494. PUGI_IMPL_NS_BEGIN
  1495. enum chartype_t
  1496. {
  1497. ct_parse_pcdata = 1, // \0, &, \r, <
  1498. ct_parse_attr = 2, // \0, &, \r, ', "
  1499. ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
  1500. ct_space = 8, // \r, \n, space, tab
  1501. ct_parse_cdata = 16, // \0, ], >, \r
  1502. ct_parse_comment = 32, // \0, -, >, \r
  1503. ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
  1504. ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
  1505. };
  1506. static const unsigned char chartype_table[256] =
  1507. {
  1508. 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
  1509. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
  1510. 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
  1511. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
  1512. 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
  1513. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
  1514. 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
  1515. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
  1516. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
  1517. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1518. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1519. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1520. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1521. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1522. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1523. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
  1524. };
  1525. enum chartypex_t
  1526. {
  1527. ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
  1528. ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", '
  1529. ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
  1530. ctx_digit = 8, // 0-9
  1531. ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
  1532. };
  1533. static const unsigned char chartypex_table[256] =
  1534. {
  1535. 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15
  1536. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
  1537. 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
  1538. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63
  1539. 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
  1540. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
  1541. 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
  1542. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
  1543. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
  1544. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1545. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1546. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1547. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1548. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1549. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1550. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
  1551. };
  1552. #ifdef PUGIXML_WCHAR_MODE
  1553. #define PUGI_IMPL_IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
  1554. #else
  1555. #define PUGI_IMPL_IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
  1556. #endif
  1557. #define PUGI_IMPL_IS_CHARTYPE(c, ct) PUGI_IMPL_IS_CHARTYPE_IMPL(c, ct, chartype_table)
  1558. #define PUGI_IMPL_IS_CHARTYPEX(c, ct) PUGI_IMPL_IS_CHARTYPE_IMPL(c, ct, chartypex_table)
  1559. PUGI_IMPL_FN bool is_little_endian()
  1560. {
  1561. unsigned int ui = 1;
  1562. return *reinterpret_cast<unsigned char*>(&ui) == 1;
  1563. }
  1564. PUGI_IMPL_FN xml_encoding get_wchar_encoding()
  1565. {
  1566. PUGI_IMPL_STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
  1567. if (sizeof(wchar_t) == 2)
  1568. return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1569. else
  1570. return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1571. }
  1572. PUGI_IMPL_FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
  1573. {
  1574. #define PUGI_IMPL_SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
  1575. #define PUGI_IMPL_SCANCHARTYPE(ct) { while (offset < size && PUGI_IMPL_IS_CHARTYPE(data[offset], ct)) offset++; }
  1576. // check if we have a non-empty XML declaration
  1577. if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI_IMPL_IS_CHARTYPE(data[5], ct_space)))
  1578. return false;
  1579. // scan XML declaration until the encoding field
  1580. for (size_t i = 6; i + 1 < size; ++i)
  1581. {
  1582. // declaration can not contain ? in quoted values
  1583. if (data[i] == '?')
  1584. return false;
  1585. if (data[i] == 'e' && data[i + 1] == 'n')
  1586. {
  1587. size_t offset = i;
  1588. // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
  1589. PUGI_IMPL_SCANCHAR('e'); PUGI_IMPL_SCANCHAR('n'); PUGI_IMPL_SCANCHAR('c'); PUGI_IMPL_SCANCHAR('o');
  1590. PUGI_IMPL_SCANCHAR('d'); PUGI_IMPL_SCANCHAR('i'); PUGI_IMPL_SCANCHAR('n'); PUGI_IMPL_SCANCHAR('g');
  1591. // S? = S?
  1592. PUGI_IMPL_SCANCHARTYPE(ct_space);
  1593. PUGI_IMPL_SCANCHAR('=');
  1594. PUGI_IMPL_SCANCHARTYPE(ct_space);
  1595. // the only two valid delimiters are ' and "
  1596. uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
  1597. PUGI_IMPL_SCANCHAR(delimiter);
  1598. size_t start = offset;
  1599. out_encoding = data + offset;
  1600. PUGI_IMPL_SCANCHARTYPE(ct_symbol);
  1601. out_length = offset - start;
  1602. PUGI_IMPL_SCANCHAR(delimiter);
  1603. return true;
  1604. }
  1605. }
  1606. return false;
  1607. #undef PUGI_IMPL_SCANCHAR
  1608. #undef PUGI_IMPL_SCANCHARTYPE
  1609. }
  1610. PUGI_IMPL_FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
  1611. {
  1612. // skip encoding autodetection if input buffer is too small
  1613. if (size < 4) return encoding_utf8;
  1614. uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
  1615. // look for BOM in first few bytes
  1616. if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
  1617. if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
  1618. if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
  1619. if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
  1620. if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
  1621. // look for <, <? or <?xm in various encodings
  1622. if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
  1623. if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
  1624. if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
  1625. if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
  1626. // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
  1627. if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
  1628. if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
  1629. // no known BOM detected; parse declaration
  1630. const uint8_t* enc = 0;
  1631. size_t enc_length = 0;
  1632. if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
  1633. {
  1634. // iso-8859-1 (case-insensitive)
  1635. if (enc_length == 10
  1636. && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
  1637. && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
  1638. && enc[8] == '-' && enc[9] == '1')
  1639. return encoding_latin1;
  1640. // latin1 (case-insensitive)
  1641. if (enc_length == 6
  1642. && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
  1643. && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
  1644. && enc[5] == '1')
  1645. return encoding_latin1;
  1646. }
  1647. return encoding_utf8;
  1648. }
  1649. PUGI_IMPL_FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
  1650. {
  1651. // replace wchar encoding with utf implementation
  1652. if (encoding == encoding_wchar) return get_wchar_encoding();
  1653. // replace utf16 encoding with utf16 with specific endianness
  1654. if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1655. // replace utf32 encoding with utf32 with specific endianness
  1656. if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1657. // only do autodetection if no explicit encoding is requested
  1658. if (encoding != encoding_auto) return encoding;
  1659. // try to guess encoding (based on XML specification, Appendix F.1)
  1660. const uint8_t* data = static_cast<const uint8_t*>(contents);
  1661. return guess_buffer_encoding(data, size);
  1662. }
  1663. PUGI_IMPL_FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
  1664. {
  1665. size_t length = size / sizeof(char_t);
  1666. if (is_mutable)
  1667. {
  1668. out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
  1669. out_length = length;
  1670. }
  1671. else
  1672. {
  1673. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1674. if (!buffer) return false;
  1675. if (contents)
  1676. memcpy(buffer, contents, length * sizeof(char_t));
  1677. else
  1678. assert(length == 0);
  1679. buffer[length] = 0;
  1680. out_buffer = buffer;
  1681. out_length = length + 1;
  1682. }
  1683. return true;
  1684. }
  1685. #ifdef PUGIXML_WCHAR_MODE
  1686. PUGI_IMPL_FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
  1687. {
  1688. return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
  1689. (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
  1690. }
  1691. PUGI_IMPL_FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
  1692. {
  1693. const char_t* data = static_cast<const char_t*>(contents);
  1694. size_t length = size / sizeof(char_t);
  1695. if (is_mutable)
  1696. {
  1697. char_t* buffer = const_cast<char_t*>(data);
  1698. convert_wchar_endian_swap(buffer, data, length);
  1699. out_buffer = buffer;
  1700. out_length = length;
  1701. }
  1702. else
  1703. {
  1704. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1705. if (!buffer) return false;
  1706. convert_wchar_endian_swap(buffer, data, length);
  1707. buffer[length] = 0;
  1708. out_buffer = buffer;
  1709. out_length = length + 1;
  1710. }
  1711. return true;
  1712. }
  1713. template <typename D> PUGI_IMPL_FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
  1714. {
  1715. const typename D::type* data = static_cast<const typename D::type*>(contents);
  1716. size_t data_length = size / sizeof(typename D::type);
  1717. // first pass: get length in wchar_t units
  1718. size_t length = D::process(data, data_length, 0, wchar_counter());
  1719. // allocate buffer of suitable length
  1720. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1721. if (!buffer) return false;
  1722. // second pass: convert utf16 input to wchar_t
  1723. wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
  1724. wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
  1725. assert(oend == obegin + length);
  1726. *oend = 0;
  1727. out_buffer = buffer;
  1728. out_length = length + 1;
  1729. return true;
  1730. }
  1731. PUGI_IMPL_FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
  1732. {
  1733. // get native encoding
  1734. xml_encoding wchar_encoding = get_wchar_encoding();
  1735. // fast path: no conversion required
  1736. if (encoding == wchar_encoding)
  1737. return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
  1738. // only endian-swapping is required
  1739. if (need_endian_swap_utf(encoding, wchar_encoding))
  1740. return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
  1741. // source encoding is utf8
  1742. if (encoding == encoding_utf8)
  1743. return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
  1744. // source encoding is utf16
  1745. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  1746. {
  1747. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1748. return (native_encoding == encoding) ?
  1749. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
  1750. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
  1751. }
  1752. // source encoding is utf32
  1753. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  1754. {
  1755. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1756. return (native_encoding == encoding) ?
  1757. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
  1758. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
  1759. }
  1760. // source encoding is latin1
  1761. if (encoding == encoding_latin1)
  1762. return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
  1763. assert(false && "Invalid encoding"); // unreachable
  1764. return false;
  1765. }
  1766. #else
  1767. template <typename D> PUGI_IMPL_FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
  1768. {
  1769. const typename D::type* data = static_cast<const typename D::type*>(contents);
  1770. size_t data_length = size / sizeof(typename D::type);
  1771. // first pass: get length in utf8 units
  1772. size_t length = D::process(data, data_length, 0, utf8_counter());
  1773. // allocate buffer of suitable length
  1774. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1775. if (!buffer) return false;
  1776. // second pass: convert utf16 input to utf8
  1777. uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
  1778. uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
  1779. assert(oend == obegin + length);
  1780. *oend = 0;
  1781. out_buffer = buffer;
  1782. out_length = length + 1;
  1783. return true;
  1784. }
  1785. PUGI_IMPL_FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
  1786. {
  1787. for (size_t i = 0; i < size; ++i)
  1788. if (data[i] > 127)
  1789. return i;
  1790. return size;
  1791. }
  1792. PUGI_IMPL_FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
  1793. {
  1794. const uint8_t* data = static_cast<const uint8_t*>(contents);
  1795. size_t data_length = size;
  1796. // get size of prefix that does not need utf8 conversion
  1797. size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
  1798. assert(prefix_length <= data_length);
  1799. const uint8_t* postfix = data + prefix_length;
  1800. size_t postfix_length = data_length - prefix_length;
  1801. // if no conversion is needed, just return the original buffer
  1802. if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
  1803. // first pass: get length in utf8 units
  1804. size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
  1805. // allocate buffer of suitable length
  1806. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1807. if (!buffer) return false;
  1808. // second pass: convert latin1 input to utf8
  1809. memcpy(buffer, data, prefix_length);
  1810. uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
  1811. uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
  1812. assert(oend == obegin + length);
  1813. *oend = 0;
  1814. out_buffer = buffer;
  1815. out_length = length + 1;
  1816. return true;
  1817. }
  1818. PUGI_IMPL_FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
  1819. {
  1820. // fast path: no conversion required
  1821. if (encoding == encoding_utf8)
  1822. return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
  1823. // source encoding is utf16
  1824. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  1825. {
  1826. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1827. return (native_encoding == encoding) ?
  1828. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
  1829. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
  1830. }
  1831. // source encoding is utf32
  1832. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  1833. {
  1834. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1835. return (native_encoding == encoding) ?
  1836. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
  1837. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
  1838. }
  1839. // source encoding is latin1
  1840. if (encoding == encoding_latin1)
  1841. return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
  1842. assert(false && "Invalid encoding"); // unreachable
  1843. return false;
  1844. }
  1845. #endif
  1846. PUGI_IMPL_FN size_t as_utf8_begin(const wchar_t* str, size_t length)
  1847. {
  1848. // get length in utf8 characters
  1849. return wchar_decoder::process(str, length, 0, utf8_counter());
  1850. }
  1851. PUGI_IMPL_FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
  1852. {
  1853. // convert to utf8
  1854. uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
  1855. uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
  1856. assert(begin + size == end);
  1857. (void)!end;
  1858. (void)!size;
  1859. }
  1860. #ifndef PUGIXML_NO_STL
  1861. PUGI_IMPL_FN std::string as_utf8_impl(const wchar_t* str, size_t length)
  1862. {
  1863. // first pass: get length in utf8 characters
  1864. size_t size = as_utf8_begin(str, length);
  1865. // allocate resulting string
  1866. std::string result;
  1867. result.resize(size);
  1868. // second pass: convert to utf8
  1869. if (size > 0) as_utf8_end(&result[0], size, str, length);
  1870. return result;
  1871. }
  1872. PUGI_IMPL_FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
  1873. {
  1874. const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
  1875. // first pass: get length in wchar_t units
  1876. size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
  1877. // allocate resulting string
  1878. std::basic_string<wchar_t> result;
  1879. result.resize(length);
  1880. // second pass: convert to wchar_t
  1881. if (length > 0)
  1882. {
  1883. wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
  1884. wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
  1885. assert(begin + length == end);
  1886. (void)!end;
  1887. }
  1888. return result;
  1889. }
  1890. #endif
  1891. template <typename Header>
  1892. inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
  1893. {
  1894. // never reuse shared memory
  1895. if (header & xml_memory_page_contents_shared_mask) return false;
  1896. size_t target_length = strlength(target);
  1897. // always reuse document buffer memory if possible
  1898. if ((header & header_mask) == 0) return target_length >= length;
  1899. // reuse heap memory if waste is not too great
  1900. const size_t reuse_threshold = 32;
  1901. return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
  1902. }
  1903. template <typename String, typename Header>
  1904. PUGI_IMPL_FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
  1905. {
  1906. assert((header & header_mask) == 0 || dest); // header bit indicates whether dest was previously allocated
  1907. if (source_length == 0)
  1908. {
  1909. // empty string and null pointer are equivalent, so just deallocate old memory
  1910. xml_allocator* alloc = PUGI_IMPL_GETPAGE_IMPL(header)->allocator;
  1911. if (header & header_mask) alloc->deallocate_string(dest);
  1912. // mark the string as not allocated
  1913. dest = 0;
  1914. header &= ~header_mask;
  1915. return true;
  1916. }
  1917. else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
  1918. {
  1919. // we can reuse old buffer, so just copy the new data (including zero terminator)
  1920. memcpy(dest, source, source_length * sizeof(char_t));
  1921. dest[source_length] = 0;
  1922. return true;
  1923. }
  1924. else
  1925. {
  1926. xml_allocator* alloc = PUGI_IMPL_GETPAGE_IMPL(header)->allocator;
  1927. if (!alloc->reserve()) return false;
  1928. // allocate new buffer
  1929. char_t* buf = alloc->allocate_string(source_length + 1);
  1930. if (!buf) return false;
  1931. // copy the string (including zero terminator)
  1932. memcpy(buf, source, source_length * sizeof(char_t));
  1933. buf[source_length] = 0;
  1934. // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
  1935. if (header & header_mask) alloc->deallocate_string(dest);
  1936. // the string is now allocated, so set the flag
  1937. dest = buf;
  1938. header |= header_mask;
  1939. return true;
  1940. }
  1941. }
  1942. struct gap
  1943. {
  1944. char_t* end;
  1945. size_t size;
  1946. gap(): end(0), size(0)
  1947. {
  1948. }
  1949. // Push new gap, move s count bytes further (skipping the gap).
  1950. // Collapse previous gap.
  1951. void push(char_t*& s, size_t count)
  1952. {
  1953. if (end) // there was a gap already; collapse it
  1954. {
  1955. // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
  1956. assert(s >= end);
  1957. memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
  1958. }
  1959. s += count; // end of current gap
  1960. // "merge" two gaps
  1961. end = s;
  1962. size += count;
  1963. }
  1964. // Collapse all gaps, return past-the-end pointer
  1965. char_t* flush(char_t* s)
  1966. {
  1967. if (end)
  1968. {
  1969. // Move [old_gap_end, current_pos) to [old_gap_start, ...)
  1970. assert(s >= end);
  1971. memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
  1972. return s - size;
  1973. }
  1974. else return s;
  1975. }
  1976. };
  1977. PUGI_IMPL_FN char_t* strconv_escape(char_t* s, gap& g)
  1978. {
  1979. char_t* stre = s + 1;
  1980. switch (*stre)
  1981. {
  1982. case '#': // &#...
  1983. {
  1984. unsigned int ucsc = 0;
  1985. if (stre[1] == 'x') // &#x... (hex code)
  1986. {
  1987. stre += 2;
  1988. char_t ch = *stre;
  1989. if (ch == ';') return stre;
  1990. for (;;)
  1991. {
  1992. if (static_cast<unsigned int>(ch - '0') <= 9)
  1993. ucsc = 16 * ucsc + (ch - '0');
  1994. else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
  1995. ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
  1996. else if (ch == ';')
  1997. break;
  1998. else // cancel
  1999. return stre;
  2000. ch = *++stre;
  2001. }
  2002. ++stre;
  2003. }
  2004. else // &#... (dec code)
  2005. {
  2006. char_t ch = *++stre;
  2007. if (ch == ';') return stre;
  2008. for (;;)
  2009. {
  2010. if (static_cast<unsigned int>(ch - '0') <= 9)
  2011. ucsc = 10 * ucsc + (ch - '0');
  2012. else if (ch == ';')
  2013. break;
  2014. else // cancel
  2015. return stre;
  2016. ch = *++stre;
  2017. }
  2018. ++stre;
  2019. }
  2020. #ifdef PUGIXML_WCHAR_MODE
  2021. s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
  2022. #else
  2023. s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
  2024. #endif
  2025. g.push(s, stre - s);
  2026. return stre;
  2027. }
  2028. case 'a': // &a
  2029. {
  2030. ++stre;
  2031. if (*stre == 'm') // &am
  2032. {
  2033. if (*++stre == 'p' && *++stre == ';') // &amp;
  2034. {
  2035. *s++ = '&';
  2036. ++stre;
  2037. g.push(s, stre - s);
  2038. return stre;
  2039. }
  2040. }
  2041. else if (*stre == 'p') // &ap
  2042. {
  2043. if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
  2044. {
  2045. *s++ = '\'';
  2046. ++stre;
  2047. g.push(s, stre - s);
  2048. return stre;
  2049. }
  2050. }
  2051. break;
  2052. }
  2053. case 'g': // &g
  2054. {
  2055. if (*++stre == 't' && *++stre == ';') // &gt;
  2056. {
  2057. *s++ = '>';
  2058. ++stre;
  2059. g.push(s, stre - s);
  2060. return stre;
  2061. }
  2062. break;
  2063. }
  2064. case 'l': // &l
  2065. {
  2066. if (*++stre == 't' && *++stre == ';') // &lt;
  2067. {
  2068. *s++ = '<';
  2069. ++stre;
  2070. g.push(s, stre - s);
  2071. return stre;
  2072. }
  2073. break;
  2074. }
  2075. case 'q': // &q
  2076. {
  2077. if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
  2078. {
  2079. *s++ = '"';
  2080. ++stre;
  2081. g.push(s, stre - s);
  2082. return stre;
  2083. }
  2084. break;
  2085. }
  2086. default:
  2087. break;
  2088. }
  2089. return stre;
  2090. }
  2091. // Parser utilities
  2092. #define PUGI_IMPL_ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
  2093. #define PUGI_IMPL_SKIPWS() { while (PUGI_IMPL_IS_CHARTYPE(*s, ct_space)) ++s; }
  2094. #define PUGI_IMPL_OPTSET(OPT) ( optmsk & (OPT) )
  2095. #define PUGI_IMPL_PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI_IMPL_THROW_ERROR(status_out_of_memory, s); }
  2096. #define PUGI_IMPL_POPNODE() { cursor = cursor->parent; }
  2097. #define PUGI_IMPL_SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
  2098. #define PUGI_IMPL_SCANWHILE(X) { while (X) ++s; }
  2099. #define PUGI_IMPL_SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI_IMPL_UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI_IMPL_UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI_IMPL_UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI_IMPL_UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
  2100. #define PUGI_IMPL_ENDSEG() { ch = *s; *s = 0; ++s; }
  2101. #define PUGI_IMPL_THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
  2102. #define PUGI_IMPL_CHECK_ERROR(err, m) { if (*s == 0) PUGI_IMPL_THROW_ERROR(err, m); }
  2103. PUGI_IMPL_FN char_t* strconv_comment(char_t* s, char_t endch)
  2104. {
  2105. gap g;
  2106. while (true)
  2107. {
  2108. PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_comment));
  2109. if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
  2110. {
  2111. *s++ = '\n'; // replace first one with 0x0a
  2112. if (*s == '\n') g.push(s, 1);
  2113. }
  2114. else if (s[0] == '-' && s[1] == '-' && PUGI_IMPL_ENDSWITH(s[2], '>')) // comment ends here
  2115. {
  2116. *g.flush(s) = 0;
  2117. return s + (s[2] == '>' ? 3 : 2);
  2118. }
  2119. else if (*s == 0)
  2120. {
  2121. return 0;
  2122. }
  2123. else ++s;
  2124. }
  2125. }
  2126. PUGI_IMPL_FN char_t* strconv_cdata(char_t* s, char_t endch)
  2127. {
  2128. gap g;
  2129. while (true)
  2130. {
  2131. PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_cdata));
  2132. if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
  2133. {
  2134. *s++ = '\n'; // replace first one with 0x0a
  2135. if (*s == '\n') g.push(s, 1);
  2136. }
  2137. else if (s[0] == ']' && s[1] == ']' && PUGI_IMPL_ENDSWITH(s[2], '>')) // CDATA ends here
  2138. {
  2139. *g.flush(s) = 0;
  2140. return s + 1;
  2141. }
  2142. else if (*s == 0)
  2143. {
  2144. return 0;
  2145. }
  2146. else ++s;
  2147. }
  2148. }
  2149. typedef char_t* (*strconv_pcdata_t)(char_t*);
  2150. template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
  2151. {
  2152. static char_t* parse(char_t* s)
  2153. {
  2154. gap g;
  2155. char_t* begin = s;
  2156. while (true)
  2157. {
  2158. PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_pcdata));
  2159. if (*s == '<') // PCDATA ends here
  2160. {
  2161. char_t* end = g.flush(s);
  2162. if (opt_trim::value)
  2163. while (end > begin && PUGI_IMPL_IS_CHARTYPE(end[-1], ct_space))
  2164. --end;
  2165. *end = 0;
  2166. return s + 1;
  2167. }
  2168. else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
  2169. {
  2170. *s++ = '\n'; // replace first one with 0x0a
  2171. if (*s == '\n') g.push(s, 1);
  2172. }
  2173. else if (opt_escape::value && *s == '&')
  2174. {
  2175. s = strconv_escape(s, g);
  2176. }
  2177. else if (*s == 0)
  2178. {
  2179. char_t* end = g.flush(s);
  2180. if (opt_trim::value)
  2181. while (end > begin && PUGI_IMPL_IS_CHARTYPE(end[-1], ct_space))
  2182. --end;
  2183. *end = 0;
  2184. return s;
  2185. }
  2186. else ++s;
  2187. }
  2188. }
  2189. };
  2190. PUGI_IMPL_FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
  2191. {
  2192. PUGI_IMPL_STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
  2193. switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
  2194. {
  2195. case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
  2196. case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
  2197. case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
  2198. case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
  2199. case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
  2200. case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
  2201. case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
  2202. case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
  2203. default: assert(false); return 0; // unreachable
  2204. }
  2205. }
  2206. typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
  2207. template <typename opt_escape> struct strconv_attribute_impl
  2208. {
  2209. static char_t* parse_wnorm(char_t* s, char_t end_quote)
  2210. {
  2211. gap g;
  2212. // trim leading whitespaces
  2213. if (PUGI_IMPL_IS_CHARTYPE(*s, ct_space))
  2214. {
  2215. char_t* str = s;
  2216. do ++str;
  2217. while (PUGI_IMPL_IS_CHARTYPE(*str, ct_space));
  2218. g.push(s, str - s);
  2219. }
  2220. while (true)
  2221. {
  2222. PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
  2223. if (*s == end_quote)
  2224. {
  2225. char_t* str = g.flush(s);
  2226. do *str-- = 0;
  2227. while (PUGI_IMPL_IS_CHARTYPE(*str, ct_space));
  2228. return s + 1;
  2229. }
  2230. else if (PUGI_IMPL_IS_CHARTYPE(*s, ct_space))
  2231. {
  2232. *s++ = ' ';
  2233. if (PUGI_IMPL_IS_CHARTYPE(*s, ct_space))
  2234. {
  2235. char_t* str = s + 1;
  2236. while (PUGI_IMPL_IS_CHARTYPE(*str, ct_space)) ++str;
  2237. g.push(s, str - s);
  2238. }
  2239. }
  2240. else if (opt_escape::value && *s == '&')
  2241. {
  2242. s = strconv_escape(s, g);
  2243. }
  2244. else if (!*s)
  2245. {
  2246. return 0;
  2247. }
  2248. else ++s;
  2249. }
  2250. }
  2251. static char_t* parse_wconv(char_t* s, char_t end_quote)
  2252. {
  2253. gap g;
  2254. while (true)
  2255. {
  2256. PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_attr_ws));
  2257. if (*s == end_quote)
  2258. {
  2259. *g.flush(s) = 0;
  2260. return s + 1;
  2261. }
  2262. else if (PUGI_IMPL_IS_CHARTYPE(*s, ct_space))
  2263. {
  2264. if (*s == '\r')
  2265. {
  2266. *s++ = ' ';
  2267. if (*s == '\n') g.push(s, 1);
  2268. }
  2269. else *s++ = ' ';
  2270. }
  2271. else if (opt_escape::value && *s == '&')
  2272. {
  2273. s = strconv_escape(s, g);
  2274. }
  2275. else if (!*s)
  2276. {
  2277. return 0;
  2278. }
  2279. else ++s;
  2280. }
  2281. }
  2282. static char_t* parse_eol(char_t* s, char_t end_quote)
  2283. {
  2284. gap g;
  2285. while (true)
  2286. {
  2287. PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_attr));
  2288. if (*s == end_quote)
  2289. {
  2290. *g.flush(s) = 0;
  2291. return s + 1;
  2292. }
  2293. else if (*s == '\r')
  2294. {
  2295. *s++ = '\n';
  2296. if (*s == '\n') g.push(s, 1);
  2297. }
  2298. else if (opt_escape::value && *s == '&')
  2299. {
  2300. s = strconv_escape(s, g);
  2301. }
  2302. else if (!*s)
  2303. {
  2304. return 0;
  2305. }
  2306. else ++s;
  2307. }
  2308. }
  2309. static char_t* parse_simple(char_t* s, char_t end_quote)
  2310. {
  2311. gap g;
  2312. while (true)
  2313. {
  2314. PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_attr));
  2315. if (*s == end_quote)
  2316. {
  2317. *g.flush(s) = 0;
  2318. return s + 1;
  2319. }
  2320. else if (opt_escape::value && *s == '&')
  2321. {
  2322. s = strconv_escape(s, g);
  2323. }
  2324. else if (!*s)
  2325. {
  2326. return 0;
  2327. }
  2328. else ++s;
  2329. }
  2330. }
  2331. };
  2332. PUGI_IMPL_FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
  2333. {
  2334. PUGI_IMPL_STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
  2335. switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
  2336. {
  2337. case 0: return strconv_attribute_impl<opt_false>::parse_simple;
  2338. case 1: return strconv_attribute_impl<opt_true>::parse_simple;
  2339. case 2: return strconv_attribute_impl<opt_false>::parse_eol;
  2340. case 3: return strconv_attribute_impl<opt_true>::parse_eol;
  2341. case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
  2342. case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
  2343. case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
  2344. case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
  2345. case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2346. case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2347. case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2348. case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2349. case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2350. case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2351. case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2352. case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2353. default: assert(false); return 0; // unreachable
  2354. }
  2355. }
  2356. inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
  2357. {
  2358. xml_parse_result result;
  2359. result.status = status;
  2360. result.offset = offset;
  2361. return result;
  2362. }
  2363. struct xml_parser
  2364. {
  2365. xml_allocator* alloc;
  2366. char_t* error_offset;
  2367. xml_parse_status error_status;
  2368. xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
  2369. {
  2370. }
  2371. // DOCTYPE consists of nested sections of the following possible types:
  2372. // <!-- ... -->, <? ... ?>, "...", '...'
  2373. // <![...]]>
  2374. // <!...>
  2375. // First group can not contain nested groups
  2376. // Second group can contain nested groups of the same type
  2377. // Third group can contain all other groups
  2378. char_t* parse_doctype_primitive(char_t* s)
  2379. {
  2380. if (*s == '"' || *s == '\'')
  2381. {
  2382. // quoted string
  2383. char_t ch = *s++;
  2384. PUGI_IMPL_SCANFOR(*s == ch);
  2385. if (!*s) PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
  2386. s++;
  2387. }
  2388. else if (s[0] == '<' && s[1] == '?')
  2389. {
  2390. // <? ... ?>
  2391. s += 2;
  2392. PUGI_IMPL_SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
  2393. if (!*s) PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
  2394. s += 2;
  2395. }
  2396. else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
  2397. {
  2398. s += 4;
  2399. PUGI_IMPL_SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
  2400. if (!*s) PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
  2401. s += 3;
  2402. }
  2403. else PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
  2404. return s;
  2405. }
  2406. char_t* parse_doctype_ignore(char_t* s)
  2407. {
  2408. size_t depth = 0;
  2409. assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
  2410. s += 3;
  2411. while (*s)
  2412. {
  2413. if (s[0] == '<' && s[1] == '!' && s[2] == '[')
  2414. {
  2415. // nested ignore section
  2416. s += 3;
  2417. depth++;
  2418. }
  2419. else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
  2420. {
  2421. // ignore section end
  2422. s += 3;
  2423. if (depth == 0)
  2424. return s;
  2425. depth--;
  2426. }
  2427. else s++;
  2428. }
  2429. PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
  2430. }
  2431. char_t* parse_doctype_group(char_t* s, char_t endch)
  2432. {
  2433. size_t depth = 0;
  2434. assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
  2435. s += 2;
  2436. while (*s)
  2437. {
  2438. if (s[0] == '<' && s[1] == '!' && s[2] != '-')
  2439. {
  2440. if (s[2] == '[')
  2441. {
  2442. // ignore
  2443. s = parse_doctype_ignore(s);
  2444. if (!s) return s;
  2445. }
  2446. else
  2447. {
  2448. // some control group
  2449. s += 2;
  2450. depth++;
  2451. }
  2452. }
  2453. else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
  2454. {
  2455. // unknown tag (forbidden), or some primitive group
  2456. s = parse_doctype_primitive(s);
  2457. if (!s) return s;
  2458. }
  2459. else if (*s == '>')
  2460. {
  2461. if (depth == 0)
  2462. return s;
  2463. depth--;
  2464. s++;
  2465. }
  2466. else s++;
  2467. }
  2468. if (depth != 0 || endch != '>') PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
  2469. return s;
  2470. }
  2471. char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
  2472. {
  2473. // parse node contents, starting with exclamation mark
  2474. ++s;
  2475. if (*s == '-') // '<!-...'
  2476. {
  2477. ++s;
  2478. if (*s == '-') // '<!--...'
  2479. {
  2480. ++s;
  2481. if (PUGI_IMPL_OPTSET(parse_comments))
  2482. {
  2483. PUGI_IMPL_PUSHNODE(node_comment); // Append a new node on the tree.
  2484. cursor->value = s; // Save the offset.
  2485. }
  2486. if (PUGI_IMPL_OPTSET(parse_eol) && PUGI_IMPL_OPTSET(parse_comments))
  2487. {
  2488. s = strconv_comment(s, endch);
  2489. if (!s) PUGI_IMPL_THROW_ERROR(status_bad_comment, cursor->value);
  2490. }
  2491. else
  2492. {
  2493. // Scan for terminating '-->'.
  2494. PUGI_IMPL_SCANFOR(s[0] == '-' && s[1] == '-' && PUGI_IMPL_ENDSWITH(s[2], '>'));
  2495. PUGI_IMPL_CHECK_ERROR(status_bad_comment, s);
  2496. if (PUGI_IMPL_OPTSET(parse_comments))
  2497. *s = 0; // Zero-terminate this segment at the first terminating '-'.
  2498. s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
  2499. }
  2500. }
  2501. else PUGI_IMPL_THROW_ERROR(status_bad_comment, s);
  2502. }
  2503. else if (*s == '[')
  2504. {
  2505. // '<![CDATA[...'
  2506. if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
  2507. {
  2508. ++s;
  2509. if (PUGI_IMPL_OPTSET(parse_cdata))
  2510. {
  2511. PUGI_IMPL_PUSHNODE(node_cdata); // Append a new node on the tree.
  2512. cursor->value = s; // Save the offset.
  2513. if (PUGI_IMPL_OPTSET(parse_eol))
  2514. {
  2515. s = strconv_cdata(s, endch);
  2516. if (!s) PUGI_IMPL_THROW_ERROR(status_bad_cdata, cursor->value);
  2517. }
  2518. else
  2519. {
  2520. // Scan for terminating ']]>'.
  2521. PUGI_IMPL_SCANFOR(s[0] == ']' && s[1] == ']' && PUGI_IMPL_ENDSWITH(s[2], '>'));
  2522. PUGI_IMPL_CHECK_ERROR(status_bad_cdata, s);
  2523. *s++ = 0; // Zero-terminate this segment.
  2524. }
  2525. }
  2526. else // Flagged for discard, but we still have to scan for the terminator.
  2527. {
  2528. // Scan for terminating ']]>'.
  2529. PUGI_IMPL_SCANFOR(s[0] == ']' && s[1] == ']' && PUGI_IMPL_ENDSWITH(s[2], '>'));
  2530. PUGI_IMPL_CHECK_ERROR(status_bad_cdata, s);
  2531. ++s;
  2532. }
  2533. s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
  2534. }
  2535. else PUGI_IMPL_THROW_ERROR(status_bad_cdata, s);
  2536. }
  2537. else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI_IMPL_ENDSWITH(s[6], 'E'))
  2538. {
  2539. s -= 2;
  2540. if (cursor->parent) PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
  2541. char_t* mark = s + 9;
  2542. s = parse_doctype_group(s, endch);
  2543. if (!s) return s;
  2544. assert((*s == 0 && endch == '>') || *s == '>');
  2545. if (*s) *s++ = 0;
  2546. if (PUGI_IMPL_OPTSET(parse_doctype))
  2547. {
  2548. while (PUGI_IMPL_IS_CHARTYPE(*mark, ct_space)) ++mark;
  2549. PUGI_IMPL_PUSHNODE(node_doctype);
  2550. cursor->value = mark;
  2551. }
  2552. }
  2553. else if (*s == 0 && endch == '-') PUGI_IMPL_THROW_ERROR(status_bad_comment, s);
  2554. else if (*s == 0 && endch == '[') PUGI_IMPL_THROW_ERROR(status_bad_cdata, s);
  2555. else PUGI_IMPL_THROW_ERROR(status_unrecognized_tag, s);
  2556. return s;
  2557. }
  2558. char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
  2559. {
  2560. // load into registers
  2561. xml_node_struct* cursor = ref_cursor;
  2562. char_t ch = 0;
  2563. // parse node contents, starting with question mark
  2564. ++s;
  2565. // read PI target
  2566. char_t* target = s;
  2567. if (!PUGI_IMPL_IS_CHARTYPE(*s, ct_start_symbol)) PUGI_IMPL_THROW_ERROR(status_bad_pi, s);
  2568. PUGI_IMPL_SCANWHILE(PUGI_IMPL_IS_CHARTYPE(*s, ct_symbol));
  2569. PUGI_IMPL_CHECK_ERROR(status_bad_pi, s);
  2570. // determine node type; stricmp / strcasecmp is not portable
  2571. bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
  2572. if (declaration ? PUGI_IMPL_OPTSET(parse_declaration) : PUGI_IMPL_OPTSET(parse_pi))
  2573. {
  2574. if (declaration)
  2575. {
  2576. // disallow non top-level declarations
  2577. if (cursor->parent) PUGI_IMPL_THROW_ERROR(status_bad_pi, s);
  2578. PUGI_IMPL_PUSHNODE(node_declaration);
  2579. }
  2580. else
  2581. {
  2582. PUGI_IMPL_PUSHNODE(node_pi);
  2583. }
  2584. cursor->name = target;
  2585. PUGI_IMPL_ENDSEG();
  2586. // parse value/attributes
  2587. if (ch == '?')
  2588. {
  2589. // empty node
  2590. if (!PUGI_IMPL_ENDSWITH(*s, '>')) PUGI_IMPL_THROW_ERROR(status_bad_pi, s);
  2591. s += (*s == '>');
  2592. PUGI_IMPL_POPNODE();
  2593. }
  2594. else if (PUGI_IMPL_IS_CHARTYPE(ch, ct_space))
  2595. {
  2596. PUGI_IMPL_SKIPWS();
  2597. // scan for tag end
  2598. char_t* value = s;
  2599. PUGI_IMPL_SCANFOR(s[0] == '?' && PUGI_IMPL_ENDSWITH(s[1], '>'));
  2600. PUGI_IMPL_CHECK_ERROR(status_bad_pi, s);
  2601. if (declaration)
  2602. {
  2603. // replace ending ? with / so that 'element' terminates properly
  2604. *s = '/';
  2605. // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
  2606. s = value;
  2607. }
  2608. else
  2609. {
  2610. // store value and step over >
  2611. cursor->value = value;
  2612. PUGI_IMPL_POPNODE();
  2613. PUGI_IMPL_ENDSEG();
  2614. s += (*s == '>');
  2615. }
  2616. }
  2617. else PUGI_IMPL_THROW_ERROR(status_bad_pi, s);
  2618. }
  2619. else
  2620. {
  2621. // scan for tag end
  2622. PUGI_IMPL_SCANFOR(s[0] == '?' && PUGI_IMPL_ENDSWITH(s[1], '>'));
  2623. PUGI_IMPL_CHECK_ERROR(status_bad_pi, s);
  2624. s += (s[1] == '>' ? 2 : 1);
  2625. }
  2626. // store from registers
  2627. ref_cursor = cursor;
  2628. return s;
  2629. }
  2630. char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
  2631. {
  2632. strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
  2633. strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
  2634. char_t ch = 0;
  2635. xml_node_struct* cursor = root;
  2636. char_t* mark = s;
  2637. char_t* merged_pcdata = s;
  2638. while (*s != 0)
  2639. {
  2640. if (*s == '<')
  2641. {
  2642. ++s;
  2643. LOC_TAG:
  2644. if (PUGI_IMPL_IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
  2645. {
  2646. PUGI_IMPL_PUSHNODE(node_element); // Append a new node to the tree.
  2647. cursor->name = s;
  2648. PUGI_IMPL_SCANWHILE_UNROLL(PUGI_IMPL_IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
  2649. PUGI_IMPL_ENDSEG(); // Save char in 'ch', terminate & step over.
  2650. if (ch == '>')
  2651. {
  2652. // end of tag
  2653. }
  2654. else if (PUGI_IMPL_IS_CHARTYPE(ch, ct_space))
  2655. {
  2656. LOC_ATTRIBUTES:
  2657. while (true)
  2658. {
  2659. PUGI_IMPL_SKIPWS(); // Eat any whitespace.
  2660. if (PUGI_IMPL_IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
  2661. {
  2662. xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
  2663. if (!a) PUGI_IMPL_THROW_ERROR(status_out_of_memory, s);
  2664. a->name = s; // Save the offset.
  2665. PUGI_IMPL_SCANWHILE_UNROLL(PUGI_IMPL_IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
  2666. PUGI_IMPL_ENDSEG(); // Save char in 'ch', terminate & step over.
  2667. if (PUGI_IMPL_IS_CHARTYPE(ch, ct_space))
  2668. {
  2669. PUGI_IMPL_SKIPWS(); // Eat any whitespace.
  2670. ch = *s;
  2671. ++s;
  2672. }
  2673. if (ch == '=') // '<... #=...'
  2674. {
  2675. PUGI_IMPL_SKIPWS(); // Eat any whitespace.
  2676. if (*s == '"' || *s == '\'') // '<... #="...'
  2677. {
  2678. ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
  2679. ++s; // Step over the quote.
  2680. a->value = s; // Save the offset.
  2681. s = strconv_attribute(s, ch);
  2682. if (!s) PUGI_IMPL_THROW_ERROR(status_bad_attribute, a->value);
  2683. // After this line the loop continues from the start;
  2684. // Whitespaces, / and > are ok, symbols and EOF are wrong,
  2685. // everything else will be detected
  2686. if (PUGI_IMPL_IS_CHARTYPE(*s, ct_start_symbol)) PUGI_IMPL_THROW_ERROR(status_bad_attribute, s);
  2687. }
  2688. else PUGI_IMPL_THROW_ERROR(status_bad_attribute, s);
  2689. }
  2690. else PUGI_IMPL_THROW_ERROR(status_bad_attribute, s);
  2691. }
  2692. else if (*s == '/')
  2693. {
  2694. ++s;
  2695. if (*s == '>')
  2696. {
  2697. PUGI_IMPL_POPNODE();
  2698. s++;
  2699. break;
  2700. }
  2701. else if (*s == 0 && endch == '>')
  2702. {
  2703. PUGI_IMPL_POPNODE();
  2704. break;
  2705. }
  2706. else PUGI_IMPL_THROW_ERROR(status_bad_start_element, s);
  2707. }
  2708. else if (*s == '>')
  2709. {
  2710. ++s;
  2711. break;
  2712. }
  2713. else if (*s == 0 && endch == '>')
  2714. {
  2715. break;
  2716. }
  2717. else PUGI_IMPL_THROW_ERROR(status_bad_start_element, s);
  2718. }
  2719. // !!!
  2720. }
  2721. else if (ch == '/') // '<#.../'
  2722. {
  2723. if (!PUGI_IMPL_ENDSWITH(*s, '>')) PUGI_IMPL_THROW_ERROR(status_bad_start_element, s);
  2724. PUGI_IMPL_POPNODE(); // Pop.
  2725. s += (*s == '>');
  2726. }
  2727. else if (ch == 0)
  2728. {
  2729. // we stepped over null terminator, backtrack & handle closing tag
  2730. --s;
  2731. if (endch != '>') PUGI_IMPL_THROW_ERROR(status_bad_start_element, s);
  2732. }
  2733. else PUGI_IMPL_THROW_ERROR(status_bad_start_element, s);
  2734. }
  2735. else if (*s == '/')
  2736. {
  2737. ++s;
  2738. mark = s;
  2739. char_t* name = cursor->name;
  2740. if (!name) PUGI_IMPL_THROW_ERROR(status_end_element_mismatch, mark);
  2741. while (PUGI_IMPL_IS_CHARTYPE(*s, ct_symbol))
  2742. {
  2743. if (*s++ != *name++) PUGI_IMPL_THROW_ERROR(status_end_element_mismatch, mark);
  2744. }
  2745. if (*name)
  2746. {
  2747. if (*s == 0 && name[0] == endch && name[1] == 0) PUGI_IMPL_THROW_ERROR(status_bad_end_element, s);
  2748. else PUGI_IMPL_THROW_ERROR(status_end_element_mismatch, mark);
  2749. }
  2750. PUGI_IMPL_POPNODE(); // Pop.
  2751. PUGI_IMPL_SKIPWS();
  2752. if (*s == 0)
  2753. {
  2754. if (endch != '>') PUGI_IMPL_THROW_ERROR(status_bad_end_element, s);
  2755. }
  2756. else
  2757. {
  2758. if (*s != '>') PUGI_IMPL_THROW_ERROR(status_bad_end_element, s);
  2759. ++s;
  2760. }
  2761. }
  2762. else if (*s == '?') // '<?...'
  2763. {
  2764. s = parse_question(s, cursor, optmsk, endch);
  2765. if (!s) return s;
  2766. assert(cursor);
  2767. if (PUGI_IMPL_NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
  2768. }
  2769. else if (*s == '!') // '<!...'
  2770. {
  2771. s = parse_exclamation(s, cursor, optmsk, endch);
  2772. if (!s) return s;
  2773. }
  2774. else if (*s == 0 && endch == '?') PUGI_IMPL_THROW_ERROR(status_bad_pi, s);
  2775. else PUGI_IMPL_THROW_ERROR(status_unrecognized_tag, s);
  2776. }
  2777. else
  2778. {
  2779. mark = s; // Save this offset while searching for a terminator.
  2780. PUGI_IMPL_SKIPWS(); // Eat whitespace if no genuine PCDATA here.
  2781. if (*s == '<' || !*s)
  2782. {
  2783. // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
  2784. assert(mark != s);
  2785. if (!PUGI_IMPL_OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI_IMPL_OPTSET(parse_trim_pcdata))
  2786. {
  2787. continue;
  2788. }
  2789. else if (PUGI_IMPL_OPTSET(parse_ws_pcdata_single))
  2790. {
  2791. if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
  2792. }
  2793. }
  2794. if (!PUGI_IMPL_OPTSET(parse_trim_pcdata))
  2795. s = mark;
  2796. if (cursor->parent || PUGI_IMPL_OPTSET(parse_fragment))
  2797. {
  2798. char_t* parsed_pcdata = s;
  2799. s = strconv_pcdata(s);
  2800. if (PUGI_IMPL_OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
  2801. {
  2802. cursor->value = parsed_pcdata; // Save the offset.
  2803. }
  2804. else if (PUGI_IMPL_OPTSET(parse_merge_pcdata) && cursor->first_child && PUGI_IMPL_NODETYPE(cursor->first_child->prev_sibling_c) == node_pcdata)
  2805. {
  2806. assert(merged_pcdata >= cursor->first_child->prev_sibling_c->value);
  2807. // Catch up to the end of last parsed value; only needed for the first fragment.
  2808. merged_pcdata += strlength(merged_pcdata);
  2809. size_t length = strlength(parsed_pcdata);
  2810. // Must use memmove instead of memcpy as this move may overlap
  2811. memmove(merged_pcdata, parsed_pcdata, (length + 1) * sizeof(char_t));
  2812. merged_pcdata += length;
  2813. }
  2814. else
  2815. {
  2816. xml_node_struct* prev_cursor = cursor;
  2817. PUGI_IMPL_PUSHNODE(node_pcdata); // Append a new node on the tree.
  2818. cursor->value = parsed_pcdata; // Save the offset.
  2819. merged_pcdata = parsed_pcdata; // Used for parse_merge_pcdata above, cheaper to save unconditionally
  2820. cursor = prev_cursor; // Pop since this is a standalone.
  2821. }
  2822. if (!*s) break;
  2823. }
  2824. else
  2825. {
  2826. PUGI_IMPL_SCANFOR(*s == '<'); // '...<'
  2827. if (!*s) break;
  2828. ++s;
  2829. }
  2830. // We're after '<'
  2831. goto LOC_TAG;
  2832. }
  2833. }
  2834. // check that last tag is closed
  2835. if (cursor != root) PUGI_IMPL_THROW_ERROR(status_end_element_mismatch, s);
  2836. return s;
  2837. }
  2838. #ifdef PUGIXML_WCHAR_MODE
  2839. static char_t* parse_skip_bom(char_t* s)
  2840. {
  2841. unsigned int bom = 0xfeff;
  2842. return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
  2843. }
  2844. #else
  2845. static char_t* parse_skip_bom(char_t* s)
  2846. {
  2847. return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
  2848. }
  2849. #endif
  2850. static bool has_element_node_siblings(xml_node_struct* node)
  2851. {
  2852. while (node)
  2853. {
  2854. if (PUGI_IMPL_NODETYPE(node) == node_element) return true;
  2855. node = node->next_sibling;
  2856. }
  2857. return false;
  2858. }
  2859. static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
  2860. {
  2861. // early-out for empty documents
  2862. if (length == 0)
  2863. return make_parse_result(PUGI_IMPL_OPTSET(parse_fragment) ? status_ok : status_no_document_element);
  2864. // get last child of the root before parsing
  2865. xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
  2866. // create parser on stack
  2867. xml_parser parser(static_cast<xml_allocator*>(xmldoc));
  2868. // save last character and make buffer zero-terminated (speeds up parsing)
  2869. char_t endch = buffer[length - 1];
  2870. buffer[length - 1] = 0;
  2871. // skip BOM to make sure it does not end up as part of parse output
  2872. char_t* buffer_data = parse_skip_bom(buffer);
  2873. // perform actual parsing
  2874. parser.parse_tree(buffer_data, root, optmsk, endch);
  2875. xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
  2876. assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
  2877. if (result)
  2878. {
  2879. // since we removed last character, we have to handle the only possible false positive (stray <)
  2880. if (endch == '<')
  2881. return make_parse_result(status_unrecognized_tag, length - 1);
  2882. // check if there are any element nodes parsed
  2883. xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child + 0;
  2884. if (!PUGI_IMPL_OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
  2885. return make_parse_result(status_no_document_element, length - 1);
  2886. }
  2887. else
  2888. {
  2889. // roll back offset if it occurs on a null terminator in the source buffer
  2890. if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
  2891. result.offset--;
  2892. }
  2893. return result;
  2894. }
  2895. };
  2896. // Output facilities
  2897. PUGI_IMPL_FN xml_encoding get_write_native_encoding()
  2898. {
  2899. #ifdef PUGIXML_WCHAR_MODE
  2900. return get_wchar_encoding();
  2901. #else
  2902. return encoding_utf8;
  2903. #endif
  2904. }
  2905. PUGI_IMPL_FN xml_encoding get_write_encoding(xml_encoding encoding)
  2906. {
  2907. // replace wchar encoding with utf implementation
  2908. if (encoding == encoding_wchar) return get_wchar_encoding();
  2909. // replace utf16 encoding with utf16 with specific endianness
  2910. if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  2911. // replace utf32 encoding with utf32 with specific endianness
  2912. if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  2913. // only do autodetection if no explicit encoding is requested
  2914. if (encoding != encoding_auto) return encoding;
  2915. // assume utf8 encoding
  2916. return encoding_utf8;
  2917. }
  2918. template <typename D, typename T> PUGI_IMPL_FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
  2919. {
  2920. PUGI_IMPL_STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
  2921. typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
  2922. return static_cast<size_t>(end - dest) * sizeof(*dest);
  2923. }
  2924. template <typename D, typename T> PUGI_IMPL_FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
  2925. {
  2926. PUGI_IMPL_STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
  2927. typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
  2928. if (opt_swap)
  2929. {
  2930. for (typename T::value_type i = dest; i != end; ++i)
  2931. *i = endian_swap(*i);
  2932. }
  2933. return static_cast<size_t>(end - dest) * sizeof(*dest);
  2934. }
  2935. #ifdef PUGIXML_WCHAR_MODE
  2936. PUGI_IMPL_FN size_t get_valid_length(const char_t* data, size_t length)
  2937. {
  2938. if (length < 1) return 0;
  2939. // discard last character if it's the lead of a surrogate pair
  2940. return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
  2941. }
  2942. PUGI_IMPL_FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
  2943. {
  2944. // only endian-swapping is required
  2945. if (need_endian_swap_utf(encoding, get_wchar_encoding()))
  2946. {
  2947. convert_wchar_endian_swap(r_char, data, length);
  2948. return length * sizeof(char_t);
  2949. }
  2950. // convert to utf8
  2951. if (encoding == encoding_utf8)
  2952. return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
  2953. // convert to utf16
  2954. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  2955. {
  2956. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  2957. return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
  2958. }
  2959. // convert to utf32
  2960. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  2961. {
  2962. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  2963. return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
  2964. }
  2965. // convert to latin1
  2966. if (encoding == encoding_latin1)
  2967. return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
  2968. assert(false && "Invalid encoding"); // unreachable
  2969. return 0;
  2970. }
  2971. #else
  2972. PUGI_IMPL_FN size_t get_valid_length(const char_t* data, size_t length)
  2973. {
  2974. if (length < 5) return 0;
  2975. for (size_t i = 1; i <= 4; ++i)
  2976. {
  2977. uint8_t ch = static_cast<uint8_t>(data[length - i]);
  2978. // either a standalone character or a leading one
  2979. if ((ch & 0xc0) != 0x80) return length - i;
  2980. }
  2981. // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
  2982. return length;
  2983. }
  2984. PUGI_IMPL_FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
  2985. {
  2986. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  2987. {
  2988. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  2989. return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
  2990. }
  2991. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  2992. {
  2993. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  2994. return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
  2995. }
  2996. if (encoding == encoding_latin1)
  2997. return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
  2998. assert(false && "Invalid encoding"); // unreachable
  2999. return 0;
  3000. }
  3001. #endif
  3002. class xml_buffered_writer
  3003. {
  3004. xml_buffered_writer(const xml_buffered_writer&);
  3005. xml_buffered_writer& operator=(const xml_buffered_writer&);
  3006. public:
  3007. xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
  3008. {
  3009. PUGI_IMPL_STATIC_ASSERT(bufcapacity >= 8);
  3010. }
  3011. size_t flush()
  3012. {
  3013. flush(buffer, bufsize);
  3014. bufsize = 0;
  3015. return 0;
  3016. }
  3017. void flush(const char_t* data, size_t size)
  3018. {
  3019. if (size == 0) return;
  3020. // fast path, just write data
  3021. if (encoding == get_write_native_encoding())
  3022. writer.write(data, size * sizeof(char_t));
  3023. else
  3024. {
  3025. // convert chunk
  3026. size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
  3027. assert(result <= sizeof(scratch));
  3028. // write data
  3029. writer.write(scratch.data_u8, result);
  3030. }
  3031. }
  3032. void write_direct(const char_t* data, size_t length)
  3033. {
  3034. // flush the remaining buffer contents
  3035. flush();
  3036. // handle large chunks
  3037. if (length > bufcapacity)
  3038. {
  3039. if (encoding == get_write_native_encoding())
  3040. {
  3041. // fast path, can just write data chunk
  3042. writer.write(data, length * sizeof(char_t));
  3043. return;
  3044. }
  3045. // need to convert in suitable chunks
  3046. while (length > bufcapacity)
  3047. {
  3048. // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
  3049. // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
  3050. size_t chunk_size = get_valid_length(data, bufcapacity);
  3051. assert(chunk_size);
  3052. // convert chunk and write
  3053. flush(data, chunk_size);
  3054. // iterate
  3055. data += chunk_size;
  3056. length -= chunk_size;
  3057. }
  3058. // small tail is copied below
  3059. bufsize = 0;
  3060. }
  3061. memcpy(buffer + bufsize, data, length * sizeof(char_t));
  3062. bufsize += length;
  3063. }
  3064. void write_buffer(const char_t* data, size_t length)
  3065. {
  3066. size_t offset = bufsize;
  3067. if (offset + length <= bufcapacity)
  3068. {
  3069. memcpy(buffer + offset, data, length * sizeof(char_t));
  3070. bufsize = offset + length;
  3071. }
  3072. else
  3073. {
  3074. write_direct(data, length);
  3075. }
  3076. }
  3077. void write_string(const char_t* data)
  3078. {
  3079. // write the part of the string that fits in the buffer
  3080. size_t offset = bufsize;
  3081. while (*data && offset < bufcapacity)
  3082. buffer[offset++] = *data++;
  3083. // write the rest
  3084. if (offset < bufcapacity)
  3085. {
  3086. bufsize = offset;
  3087. }
  3088. else
  3089. {
  3090. // backtrack a bit if we have split the codepoint
  3091. size_t length = offset - bufsize;
  3092. size_t extra = length - get_valid_length(data - length, length);
  3093. bufsize = offset - extra;
  3094. write_direct(data - extra, strlength(data) + extra);
  3095. }
  3096. }
  3097. void write(char_t d0)
  3098. {
  3099. size_t offset = bufsize;
  3100. if (offset > bufcapacity - 1) offset = flush();
  3101. buffer[offset + 0] = d0;
  3102. bufsize = offset + 1;
  3103. }
  3104. void write(char_t d0, char_t d1)
  3105. {
  3106. size_t offset = bufsize;
  3107. if (offset > bufcapacity - 2) offset = flush();
  3108. buffer[offset + 0] = d0;
  3109. buffer[offset + 1] = d1;
  3110. bufsize = offset + 2;
  3111. }
  3112. void write(char_t d0, char_t d1, char_t d2)
  3113. {
  3114. size_t offset = bufsize;
  3115. if (offset > bufcapacity - 3) offset = flush();
  3116. buffer[offset + 0] = d0;
  3117. buffer[offset + 1] = d1;
  3118. buffer[offset + 2] = d2;
  3119. bufsize = offset + 3;
  3120. }
  3121. void write(char_t d0, char_t d1, char_t d2, char_t d3)
  3122. {
  3123. size_t offset = bufsize;
  3124. if (offset > bufcapacity - 4) offset = flush();
  3125. buffer[offset + 0] = d0;
  3126. buffer[offset + 1] = d1;
  3127. buffer[offset + 2] = d2;
  3128. buffer[offset + 3] = d3;
  3129. bufsize = offset + 4;
  3130. }
  3131. void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
  3132. {
  3133. size_t offset = bufsize;
  3134. if (offset > bufcapacity - 5) offset = flush();
  3135. buffer[offset + 0] = d0;
  3136. buffer[offset + 1] = d1;
  3137. buffer[offset + 2] = d2;
  3138. buffer[offset + 3] = d3;
  3139. buffer[offset + 4] = d4;
  3140. bufsize = offset + 5;
  3141. }
  3142. void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
  3143. {
  3144. size_t offset = bufsize;
  3145. if (offset > bufcapacity - 6) offset = flush();
  3146. buffer[offset + 0] = d0;
  3147. buffer[offset + 1] = d1;
  3148. buffer[offset + 2] = d2;
  3149. buffer[offset + 3] = d3;
  3150. buffer[offset + 4] = d4;
  3151. buffer[offset + 5] = d5;
  3152. bufsize = offset + 6;
  3153. }
  3154. // utf8 maximum expansion: x4 (-> utf32)
  3155. // utf16 maximum expansion: x2 (-> utf32)
  3156. // utf32 maximum expansion: x1
  3157. enum
  3158. {
  3159. bufcapacitybytes =
  3160. #ifdef PUGIXML_MEMORY_OUTPUT_STACK
  3161. PUGIXML_MEMORY_OUTPUT_STACK
  3162. #else
  3163. 10240
  3164. #endif
  3165. ,
  3166. bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
  3167. };
  3168. char_t buffer[bufcapacity];
  3169. union
  3170. {
  3171. uint8_t data_u8[4 * bufcapacity];
  3172. uint16_t data_u16[2 * bufcapacity];
  3173. uint32_t data_u32[bufcapacity];
  3174. char_t data_char[bufcapacity];
  3175. } scratch;
  3176. xml_writer& writer;
  3177. size_t bufsize;
  3178. xml_encoding encoding;
  3179. };
  3180. PUGI_IMPL_FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
  3181. {
  3182. while (*s)
  3183. {
  3184. const char_t* prev = s;
  3185. // While *s is a usual symbol
  3186. PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPEX(ss, type));
  3187. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3188. switch (*s)
  3189. {
  3190. case 0: break;
  3191. case '&':
  3192. writer.write('&', 'a', 'm', 'p', ';');
  3193. ++s;
  3194. break;
  3195. case '<':
  3196. writer.write('&', 'l', 't', ';');
  3197. ++s;
  3198. break;
  3199. case '>':
  3200. writer.write('&', 'g', 't', ';');
  3201. ++s;
  3202. break;
  3203. case '"':
  3204. if (flags & format_attribute_single_quote)
  3205. writer.write('"');
  3206. else
  3207. writer.write('&', 'q', 'u', 'o', 't', ';');
  3208. ++s;
  3209. break;
  3210. case '\'':
  3211. if (flags & format_attribute_single_quote)
  3212. writer.write('&', 'a', 'p', 'o', 's', ';');
  3213. else
  3214. writer.write('\'');
  3215. ++s;
  3216. break;
  3217. default: // s is not a usual symbol
  3218. {
  3219. unsigned int ch = static_cast<unsigned int>(*s++);
  3220. assert(ch < 32);
  3221. if (!(flags & format_skip_control_chars))
  3222. writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
  3223. }
  3224. }
  3225. }
  3226. }
  3227. PUGI_IMPL_FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
  3228. {
  3229. if (flags & format_no_escapes)
  3230. writer.write_string(s);
  3231. else
  3232. text_output_escaped(writer, s, type, flags);
  3233. }
  3234. PUGI_IMPL_FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
  3235. {
  3236. do
  3237. {
  3238. writer.write('<', '!', '[', 'C', 'D');
  3239. writer.write('A', 'T', 'A', '[');
  3240. const char_t* prev = s;
  3241. // look for ]]> sequence - we can't output it as is since it terminates CDATA
  3242. while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
  3243. // skip ]] if we stopped at ]]>, > will go to the next CDATA section
  3244. if (*s) s += 2;
  3245. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3246. writer.write(']', ']', '>');
  3247. }
  3248. while (*s);
  3249. }
  3250. PUGI_IMPL_FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
  3251. {
  3252. switch (indent_length)
  3253. {
  3254. case 1:
  3255. {
  3256. for (unsigned int i = 0; i < depth; ++i)
  3257. writer.write(indent[0]);
  3258. break;
  3259. }
  3260. case 2:
  3261. {
  3262. for (unsigned int i = 0; i < depth; ++i)
  3263. writer.write(indent[0], indent[1]);
  3264. break;
  3265. }
  3266. case 3:
  3267. {
  3268. for (unsigned int i = 0; i < depth; ++i)
  3269. writer.write(indent[0], indent[1], indent[2]);
  3270. break;
  3271. }
  3272. case 4:
  3273. {
  3274. for (unsigned int i = 0; i < depth; ++i)
  3275. writer.write(indent[0], indent[1], indent[2], indent[3]);
  3276. break;
  3277. }
  3278. default:
  3279. {
  3280. for (unsigned int i = 0; i < depth; ++i)
  3281. writer.write_buffer(indent, indent_length);
  3282. }
  3283. }
  3284. }
  3285. PUGI_IMPL_FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
  3286. {
  3287. writer.write('<', '!', '-', '-');
  3288. while (*s)
  3289. {
  3290. const char_t* prev = s;
  3291. // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
  3292. while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
  3293. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3294. if (*s)
  3295. {
  3296. assert(*s == '-');
  3297. writer.write('-', ' ');
  3298. ++s;
  3299. }
  3300. }
  3301. writer.write('-', '-', '>');
  3302. }
  3303. PUGI_IMPL_FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
  3304. {
  3305. while (*s)
  3306. {
  3307. const char_t* prev = s;
  3308. // look for ?> sequence - we can't output it since ?> terminates PI
  3309. while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
  3310. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3311. if (*s)
  3312. {
  3313. assert(s[0] == '?' && s[1] == '>');
  3314. writer.write('?', ' ', '>');
  3315. s += 2;
  3316. }
  3317. }
  3318. }
  3319. PUGI_IMPL_FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
  3320. {
  3321. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3322. const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"';
  3323. for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
  3324. {
  3325. if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
  3326. {
  3327. writer.write('\n');
  3328. text_output_indent(writer, indent, indent_length, depth + 1);
  3329. }
  3330. else
  3331. {
  3332. writer.write(' ');
  3333. }
  3334. writer.write_string(a->name ? a->name + 0 : default_name);
  3335. writer.write('=', enquotation_char);
  3336. if (a->value)
  3337. text_output(writer, a->value, ctx_special_attr, flags);
  3338. writer.write(enquotation_char);
  3339. }
  3340. }
  3341. PUGI_IMPL_FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
  3342. {
  3343. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3344. const char_t* name = node->name ? node->name + 0 : default_name;
  3345. writer.write('<');
  3346. writer.write_string(name);
  3347. if (node->first_attribute)
  3348. node_output_attributes(writer, node, indent, indent_length, flags, depth);
  3349. // element nodes can have value if parse_embed_pcdata was used
  3350. if (!node->value)
  3351. {
  3352. if (!node->first_child)
  3353. {
  3354. if (flags & format_no_empty_element_tags)
  3355. {
  3356. writer.write('>', '<', '/');
  3357. writer.write_string(name);
  3358. writer.write('>');
  3359. return false;
  3360. }
  3361. else
  3362. {
  3363. if ((flags & format_raw) == 0)
  3364. writer.write(' ');
  3365. writer.write('/', '>');
  3366. return false;
  3367. }
  3368. }
  3369. else
  3370. {
  3371. writer.write('>');
  3372. return true;
  3373. }
  3374. }
  3375. else
  3376. {
  3377. writer.write('>');
  3378. text_output(writer, node->value, ctx_special_pcdata, flags);
  3379. if (!node->first_child)
  3380. {
  3381. writer.write('<', '/');
  3382. writer.write_string(name);
  3383. writer.write('>');
  3384. return false;
  3385. }
  3386. else
  3387. {
  3388. return true;
  3389. }
  3390. }
  3391. }
  3392. PUGI_IMPL_FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
  3393. {
  3394. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3395. const char_t* name = node->name ? node->name + 0 : default_name;
  3396. writer.write('<', '/');
  3397. writer.write_string(name);
  3398. writer.write('>');
  3399. }
  3400. PUGI_IMPL_FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
  3401. {
  3402. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3403. switch (PUGI_IMPL_NODETYPE(node))
  3404. {
  3405. case node_pcdata:
  3406. text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
  3407. break;
  3408. case node_cdata:
  3409. text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
  3410. break;
  3411. case node_comment:
  3412. node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
  3413. break;
  3414. case node_pi:
  3415. writer.write('<', '?');
  3416. writer.write_string(node->name ? node->name + 0 : default_name);
  3417. if (node->value)
  3418. {
  3419. writer.write(' ');
  3420. node_output_pi_value(writer, node->value);
  3421. }
  3422. writer.write('?', '>');
  3423. break;
  3424. case node_declaration:
  3425. writer.write('<', '?');
  3426. writer.write_string(node->name ? node->name + 0 : default_name);
  3427. node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
  3428. writer.write('?', '>');
  3429. break;
  3430. case node_doctype:
  3431. writer.write('<', '!', 'D', 'O', 'C');
  3432. writer.write('T', 'Y', 'P', 'E');
  3433. if (node->value)
  3434. {
  3435. writer.write(' ');
  3436. writer.write_string(node->value);
  3437. }
  3438. writer.write('>');
  3439. break;
  3440. default:
  3441. assert(false && "Invalid node type"); // unreachable
  3442. }
  3443. }
  3444. enum indent_flags_t
  3445. {
  3446. indent_newline = 1,
  3447. indent_indent = 2
  3448. };
  3449. PUGI_IMPL_FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
  3450. {
  3451. size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
  3452. unsigned int indent_flags = indent_indent;
  3453. xml_node_struct* node = root;
  3454. do
  3455. {
  3456. assert(node);
  3457. // begin writing current node
  3458. if (PUGI_IMPL_NODETYPE(node) == node_pcdata || PUGI_IMPL_NODETYPE(node) == node_cdata)
  3459. {
  3460. node_output_simple(writer, node, flags);
  3461. indent_flags = 0;
  3462. }
  3463. else
  3464. {
  3465. if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
  3466. writer.write('\n');
  3467. if ((indent_flags & indent_indent) && indent_length)
  3468. text_output_indent(writer, indent, indent_length, depth);
  3469. if (PUGI_IMPL_NODETYPE(node) == node_element)
  3470. {
  3471. indent_flags = indent_newline | indent_indent;
  3472. if (node_output_start(writer, node, indent, indent_length, flags, depth))
  3473. {
  3474. // element nodes can have value if parse_embed_pcdata was used
  3475. if (node->value)
  3476. indent_flags = 0;
  3477. node = node->first_child;
  3478. depth++;
  3479. continue;
  3480. }
  3481. }
  3482. else if (PUGI_IMPL_NODETYPE(node) == node_document)
  3483. {
  3484. indent_flags = indent_indent;
  3485. if (node->first_child)
  3486. {
  3487. node = node->first_child;
  3488. continue;
  3489. }
  3490. }
  3491. else
  3492. {
  3493. node_output_simple(writer, node, flags);
  3494. indent_flags = indent_newline | indent_indent;
  3495. }
  3496. }
  3497. // continue to the next node
  3498. while (node != root)
  3499. {
  3500. if (node->next_sibling)
  3501. {
  3502. node = node->next_sibling;
  3503. break;
  3504. }
  3505. node = node->parent;
  3506. // write closing node
  3507. if (PUGI_IMPL_NODETYPE(node) == node_element)
  3508. {
  3509. depth--;
  3510. if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
  3511. writer.write('\n');
  3512. if ((indent_flags & indent_indent) && indent_length)
  3513. text_output_indent(writer, indent, indent_length, depth);
  3514. node_output_end(writer, node);
  3515. indent_flags = indent_newline | indent_indent;
  3516. }
  3517. }
  3518. }
  3519. while (node != root);
  3520. if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
  3521. writer.write('\n');
  3522. }
  3523. PUGI_IMPL_FN bool has_declaration(xml_node_struct* node)
  3524. {
  3525. for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
  3526. {
  3527. xml_node_type type = PUGI_IMPL_NODETYPE(child);
  3528. if (type == node_declaration) return true;
  3529. if (type == node_element) return false;
  3530. }
  3531. return false;
  3532. }
  3533. PUGI_IMPL_FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
  3534. {
  3535. for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
  3536. if (a == attr)
  3537. return true;
  3538. return false;
  3539. }
  3540. PUGI_IMPL_FN bool allow_insert_attribute(xml_node_type parent)
  3541. {
  3542. return parent == node_element || parent == node_declaration;
  3543. }
  3544. PUGI_IMPL_FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
  3545. {
  3546. if (parent != node_document && parent != node_element) return false;
  3547. if (child == node_document || child == node_null) return false;
  3548. if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
  3549. return true;
  3550. }
  3551. PUGI_IMPL_FN bool allow_move(xml_node parent, xml_node child)
  3552. {
  3553. // check that child can be a child of parent
  3554. if (!allow_insert_child(parent.type(), child.type()))
  3555. return false;
  3556. // check that node is not moved between documents
  3557. if (parent.root() != child.root())
  3558. return false;
  3559. // check that new parent is not in the child subtree
  3560. xml_node cur = parent;
  3561. while (cur)
  3562. {
  3563. if (cur == child)
  3564. return false;
  3565. cur = cur.parent();
  3566. }
  3567. return true;
  3568. }
  3569. template <typename String, typename Header>
  3570. PUGI_IMPL_FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
  3571. {
  3572. assert(!dest && (header & header_mask) == 0); // copies are performed into fresh nodes
  3573. if (source)
  3574. {
  3575. if (alloc && (source_header & header_mask) == 0)
  3576. {
  3577. dest = source;
  3578. // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
  3579. header |= xml_memory_page_contents_shared_mask;
  3580. source_header |= xml_memory_page_contents_shared_mask;
  3581. }
  3582. else
  3583. strcpy_insitu(dest, header, header_mask, source, strlength(source));
  3584. }
  3585. }
  3586. PUGI_IMPL_FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
  3587. {
  3588. node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
  3589. node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
  3590. for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
  3591. {
  3592. xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
  3593. if (da)
  3594. {
  3595. node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
  3596. node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
  3597. }
  3598. }
  3599. }
  3600. PUGI_IMPL_FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
  3601. {
  3602. xml_allocator& alloc = get_allocator(dn);
  3603. xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
  3604. node_copy_contents(dn, sn, shared_alloc);
  3605. xml_node_struct* dit = dn;
  3606. xml_node_struct* sit = sn->first_child;
  3607. while (sit && sit != sn)
  3608. {
  3609. // loop invariant: dit is inside the subtree rooted at dn
  3610. assert(dit);
  3611. // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
  3612. if (sit != dn)
  3613. {
  3614. xml_node_struct* copy = append_new_node(dit, alloc, PUGI_IMPL_NODETYPE(sit));
  3615. if (copy)
  3616. {
  3617. node_copy_contents(copy, sit, shared_alloc);
  3618. if (sit->first_child)
  3619. {
  3620. dit = copy;
  3621. sit = sit->first_child;
  3622. continue;
  3623. }
  3624. }
  3625. }
  3626. // continue to the next node
  3627. do
  3628. {
  3629. if (sit->next_sibling)
  3630. {
  3631. sit = sit->next_sibling;
  3632. break;
  3633. }
  3634. sit = sit->parent;
  3635. dit = dit->parent;
  3636. // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn
  3637. assert(sit == sn || dit);
  3638. }
  3639. while (sit != sn);
  3640. }
  3641. assert(!sit || dit == dn->parent);
  3642. }
  3643. PUGI_IMPL_FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
  3644. {
  3645. xml_allocator& alloc = get_allocator(da);
  3646. xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
  3647. node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
  3648. node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
  3649. }
  3650. inline bool is_text_node(xml_node_struct* node)
  3651. {
  3652. xml_node_type type = PUGI_IMPL_NODETYPE(node);
  3653. return type == node_pcdata || type == node_cdata;
  3654. }
  3655. // get value with conversion functions
  3656. template <typename U> PUGI_IMPL_FN PUGI_IMPL_UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
  3657. {
  3658. U result = 0;
  3659. const char_t* s = value;
  3660. while (PUGI_IMPL_IS_CHARTYPE(*s, ct_space))
  3661. s++;
  3662. bool negative = (*s == '-');
  3663. s += (*s == '+' || *s == '-');
  3664. bool overflow = false;
  3665. if (s[0] == '0' && (s[1] | ' ') == 'x')
  3666. {
  3667. s += 2;
  3668. // since overflow detection relies on length of the sequence skip leading zeros
  3669. while (*s == '0')
  3670. s++;
  3671. const char_t* start = s;
  3672. for (;;)
  3673. {
  3674. if (static_cast<unsigned>(*s - '0') < 10)
  3675. result = result * 16 + (*s - '0');
  3676. else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
  3677. result = result * 16 + ((*s | ' ') - 'a' + 10);
  3678. else
  3679. break;
  3680. s++;
  3681. }
  3682. size_t digits = static_cast<size_t>(s - start);
  3683. overflow = digits > sizeof(U) * 2;
  3684. }
  3685. else
  3686. {
  3687. // since overflow detection relies on length of the sequence skip leading zeros
  3688. while (*s == '0')
  3689. s++;
  3690. const char_t* start = s;
  3691. for (;;)
  3692. {
  3693. if (static_cast<unsigned>(*s - '0') < 10)
  3694. result = result * 10 + (*s - '0');
  3695. else
  3696. break;
  3697. s++;
  3698. }
  3699. size_t digits = static_cast<size_t>(s - start);
  3700. PUGI_IMPL_STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
  3701. const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
  3702. const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
  3703. const size_t high_bit = sizeof(U) * 8 - 1;
  3704. overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
  3705. }
  3706. if (negative)
  3707. {
  3708. // Workaround for crayc++ CC-3059: Expected no overflow in routine.
  3709. #ifdef _CRAYC
  3710. return (overflow || result > ~minv + 1) ? minv : ~result + 1;
  3711. #else
  3712. return (overflow || result > 0 - minv) ? minv : 0 - result;
  3713. #endif
  3714. }
  3715. else
  3716. return (overflow || result > maxv) ? maxv : result;
  3717. }
  3718. PUGI_IMPL_FN int get_value_int(const char_t* value)
  3719. {
  3720. return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
  3721. }
  3722. PUGI_IMPL_FN unsigned int get_value_uint(const char_t* value)
  3723. {
  3724. return string_to_integer<unsigned int>(value, 0, UINT_MAX);
  3725. }
  3726. PUGI_IMPL_FN double get_value_double(const char_t* value)
  3727. {
  3728. #ifdef PUGIXML_WCHAR_MODE
  3729. return wcstod(value, 0);
  3730. #else
  3731. return strtod(value, 0);
  3732. #endif
  3733. }
  3734. PUGI_IMPL_FN float get_value_float(const char_t* value)
  3735. {
  3736. #ifdef PUGIXML_WCHAR_MODE
  3737. return static_cast<float>(wcstod(value, 0));
  3738. #else
  3739. return static_cast<float>(strtod(value, 0));
  3740. #endif
  3741. }
  3742. PUGI_IMPL_FN bool get_value_bool(const char_t* value)
  3743. {
  3744. // only look at first char
  3745. char_t first = *value;
  3746. // 1*, t* (true), T* (True), y* (yes), Y* (YES)
  3747. return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
  3748. }
  3749. #ifdef PUGIXML_HAS_LONG_LONG
  3750. PUGI_IMPL_FN long long get_value_llong(const char_t* value)
  3751. {
  3752. return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
  3753. }
  3754. PUGI_IMPL_FN unsigned long long get_value_ullong(const char_t* value)
  3755. {
  3756. return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
  3757. }
  3758. #endif
  3759. template <typename U> PUGI_IMPL_FN PUGI_IMPL_UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
  3760. {
  3761. char_t* result = end - 1;
  3762. U rest = negative ? 0 - value : value;
  3763. do
  3764. {
  3765. *result-- = static_cast<char_t>('0' + (rest % 10));
  3766. rest /= 10;
  3767. }
  3768. while (rest);
  3769. assert(result >= begin);
  3770. (void)begin;
  3771. *result = '-';
  3772. return result + !negative;
  3773. }
  3774. // set value with conversion functions
  3775. template <typename String, typename Header>
  3776. PUGI_IMPL_FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
  3777. {
  3778. #ifdef PUGIXML_WCHAR_MODE
  3779. char_t wbuf[128];
  3780. assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
  3781. size_t offset = 0;
  3782. for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
  3783. return strcpy_insitu(dest, header, header_mask, wbuf, offset);
  3784. #else
  3785. return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
  3786. #endif
  3787. }
  3788. template <typename U, typename String, typename Header>
  3789. PUGI_IMPL_FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
  3790. {
  3791. char_t buf[64];
  3792. char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
  3793. char_t* begin = integer_to_string(buf, end, value, negative);
  3794. return strcpy_insitu(dest, header, header_mask, begin, end - begin);
  3795. }
  3796. template <typename String, typename Header>
  3797. PUGI_IMPL_FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision)
  3798. {
  3799. char buf[128];
  3800. PUGI_IMPL_SNPRINTF(buf, "%.*g", precision, double(value));
  3801. return set_value_ascii(dest, header, header_mask, buf);
  3802. }
  3803. template <typename String, typename Header>
  3804. PUGI_IMPL_FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision)
  3805. {
  3806. char buf[128];
  3807. PUGI_IMPL_SNPRINTF(buf, "%.*g", precision, value);
  3808. return set_value_ascii(dest, header, header_mask, buf);
  3809. }
  3810. template <typename String, typename Header>
  3811. PUGI_IMPL_FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
  3812. {
  3813. return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
  3814. }
  3815. PUGI_IMPL_FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
  3816. {
  3817. // check input buffer
  3818. if (!contents && size) return make_parse_result(status_io_error);
  3819. // get actual encoding
  3820. xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
  3821. // if convert_buffer below throws bad_alloc, we still need to deallocate contents if we own it
  3822. auto_deleter<void> contents_guard(own ? contents : 0, xml_memory::deallocate);
  3823. // get private buffer
  3824. char_t* buffer = 0;
  3825. size_t length = 0;
  3826. // coverity[var_deref_model]
  3827. if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
  3828. // after this we either deallocate contents (below) or hold on to it via doc->buffer, so we don't need to guard it
  3829. contents_guard.release();
  3830. // delete original buffer if we performed a conversion
  3831. if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
  3832. // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
  3833. if (own || buffer != contents) *out_buffer = buffer;
  3834. // store buffer for offset_debug
  3835. doc->buffer = buffer;
  3836. // parse
  3837. xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
  3838. // remember encoding
  3839. res.encoding = buffer_encoding;
  3840. return res;
  3841. }
  3842. // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
  3843. PUGI_IMPL_FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
  3844. {
  3845. #if defined(__linux__) || defined(__APPLE__)
  3846. // this simultaneously retrieves the file size and file mode (to guard against loading non-files)
  3847. struct stat st;
  3848. if (fstat(fileno(file), &st) != 0) return status_io_error;
  3849. // anything that's not a regular file doesn't have a coherent length
  3850. if (!S_ISREG(st.st_mode)) return status_io_error;
  3851. typedef off_t length_type;
  3852. length_type length = st.st_size;
  3853. #elif defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400
  3854. // there are 64-bit versions of fseek/ftell, let's use them
  3855. typedef __int64 length_type;
  3856. _fseeki64(file, 0, SEEK_END);
  3857. length_type length = _ftelli64(file);
  3858. _fseeki64(file, 0, SEEK_SET);
  3859. #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
  3860. // there are 64-bit versions of fseek/ftell, let's use them
  3861. typedef off64_t length_type;
  3862. fseeko64(file, 0, SEEK_END);
  3863. length_type length = ftello64(file);
  3864. fseeko64(file, 0, SEEK_SET);
  3865. #else
  3866. // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
  3867. typedef long length_type;
  3868. fseek(file, 0, SEEK_END);
  3869. length_type length = ftell(file);
  3870. fseek(file, 0, SEEK_SET);
  3871. #endif
  3872. // check for I/O errors
  3873. if (length < 0) return status_io_error;
  3874. // check for overflow
  3875. size_t result = static_cast<size_t>(length);
  3876. if (static_cast<length_type>(result) != length) return status_out_of_memory;
  3877. // finalize
  3878. out_result = result;
  3879. return status_ok;
  3880. }
  3881. // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
  3882. PUGI_IMPL_FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
  3883. {
  3884. // We only need to zero-terminate if encoding conversion does not do it for us
  3885. #ifdef PUGIXML_WCHAR_MODE
  3886. xml_encoding wchar_encoding = get_wchar_encoding();
  3887. if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
  3888. {
  3889. size_t length = size / sizeof(char_t);
  3890. static_cast<char_t*>(buffer)[length] = 0;
  3891. return (length + 1) * sizeof(char_t);
  3892. }
  3893. #else
  3894. if (encoding == encoding_utf8)
  3895. {
  3896. static_cast<char*>(buffer)[size] = 0;
  3897. return size + 1;
  3898. }
  3899. #endif
  3900. return size;
  3901. }
  3902. PUGI_IMPL_FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
  3903. {
  3904. if (!file) return make_parse_result(status_file_not_found);
  3905. // get file size (can result in I/O errors)
  3906. size_t size = 0;
  3907. xml_parse_status size_status = get_file_size(file, size);
  3908. if (size_status != status_ok) return make_parse_result(size_status);
  3909. size_t max_suffix_size = sizeof(char_t);
  3910. // allocate buffer for the whole file
  3911. char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
  3912. if (!contents) return make_parse_result(status_out_of_memory);
  3913. // read file in memory
  3914. size_t read_size = fread(contents, 1, size, file);
  3915. if (read_size != size)
  3916. {
  3917. xml_memory::deallocate(contents);
  3918. return make_parse_result(status_io_error);
  3919. }
  3920. xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
  3921. return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
  3922. }
  3923. PUGI_IMPL_FN void close_file(FILE* file)
  3924. {
  3925. fclose(file);
  3926. }
  3927. #ifndef PUGIXML_NO_STL
  3928. template <typename T> struct xml_stream_chunk
  3929. {
  3930. static xml_stream_chunk* create()
  3931. {
  3932. void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
  3933. if (!memory) return 0;
  3934. return new (memory) xml_stream_chunk();
  3935. }
  3936. static void destroy(xml_stream_chunk* chunk)
  3937. {
  3938. // free chunk chain
  3939. while (chunk)
  3940. {
  3941. xml_stream_chunk* next_ = chunk->next;
  3942. xml_memory::deallocate(chunk);
  3943. chunk = next_;
  3944. }
  3945. }
  3946. xml_stream_chunk(): next(0), size(0)
  3947. {
  3948. }
  3949. xml_stream_chunk* next;
  3950. size_t size;
  3951. T data[xml_memory_page_size / sizeof(T)];
  3952. };
  3953. template <typename T> PUGI_IMPL_FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
  3954. {
  3955. auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
  3956. // read file to a chunk list
  3957. size_t total = 0;
  3958. xml_stream_chunk<T>* last = 0;
  3959. while (!stream.eof())
  3960. {
  3961. // allocate new chunk
  3962. xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
  3963. if (!chunk) return status_out_of_memory;
  3964. // append chunk to list
  3965. if (last) last = last->next = chunk;
  3966. else chunks.data = last = chunk;
  3967. // read data to chunk
  3968. stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
  3969. chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
  3970. // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
  3971. if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
  3972. // guard against huge files (chunk size is small enough to make this overflow check work)
  3973. if (total + chunk->size < total) return status_out_of_memory;
  3974. total += chunk->size;
  3975. }
  3976. size_t max_suffix_size = sizeof(char_t);
  3977. // copy chunk list to a contiguous buffer
  3978. char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
  3979. if (!buffer) return status_out_of_memory;
  3980. char* write = buffer;
  3981. for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
  3982. {
  3983. assert(write + chunk->size <= buffer + total);
  3984. memcpy(write, chunk->data, chunk->size);
  3985. write += chunk->size;
  3986. }
  3987. assert(write == buffer + total);
  3988. // return buffer
  3989. *out_buffer = buffer;
  3990. *out_size = total;
  3991. return status_ok;
  3992. }
  3993. template <typename T> PUGI_IMPL_FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
  3994. {
  3995. // get length of remaining data in stream
  3996. typename std::basic_istream<T>::pos_type pos = stream.tellg();
  3997. stream.seekg(0, std::ios::end);
  3998. std::streamoff length = stream.tellg() - pos;
  3999. stream.seekg(pos);
  4000. if (stream.fail() || pos < 0) return status_io_error;
  4001. // guard against huge files
  4002. size_t read_length = static_cast<size_t>(length);
  4003. if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
  4004. size_t max_suffix_size = sizeof(char_t);
  4005. // read stream data into memory (guard against stream exceptions with buffer holder)
  4006. auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
  4007. if (!buffer.data) return status_out_of_memory;
  4008. stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
  4009. // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
  4010. if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
  4011. // return buffer
  4012. size_t actual_length = static_cast<size_t>(stream.gcount());
  4013. assert(actual_length <= read_length);
  4014. *out_buffer = buffer.release();
  4015. *out_size = actual_length * sizeof(T);
  4016. return status_ok;
  4017. }
  4018. template <typename T> PUGI_IMPL_FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
  4019. {
  4020. void* buffer = 0;
  4021. size_t size = 0;
  4022. xml_parse_status status = status_ok;
  4023. // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
  4024. if (stream.fail()) return make_parse_result(status_io_error);
  4025. // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
  4026. if (stream.tellg() < 0)
  4027. {
  4028. stream.clear(); // clear error flags that could be set by a failing tellg
  4029. status = load_stream_data_noseek(stream, &buffer, &size);
  4030. }
  4031. else
  4032. status = load_stream_data_seek(stream, &buffer, &size);
  4033. if (status != status_ok) return make_parse_result(status);
  4034. xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
  4035. return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
  4036. }
  4037. #endif
  4038. #if defined(PUGI_IMPL_MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
  4039. PUGI_IMPL_FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
  4040. {
  4041. #if defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400
  4042. FILE* file = 0;
  4043. return _wfopen_s(&file, path, mode) == 0 ? file : 0;
  4044. #else
  4045. return _wfopen(path, mode);
  4046. #endif
  4047. }
  4048. #else
  4049. PUGI_IMPL_FN char* convert_path_heap(const wchar_t* str)
  4050. {
  4051. assert(str);
  4052. // first pass: get length in utf8 characters
  4053. size_t length = strlength_wide(str);
  4054. size_t size = as_utf8_begin(str, length);
  4055. // allocate resulting string
  4056. char* result = static_cast<char*>(xml_memory::allocate(size + 1));
  4057. if (!result) return 0;
  4058. // second pass: convert to utf8
  4059. as_utf8_end(result, size, str, length);
  4060. // zero-terminate
  4061. result[size] = 0;
  4062. return result;
  4063. }
  4064. PUGI_IMPL_FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
  4065. {
  4066. // there is no standard function to open wide paths, so our best bet is to try utf8 path
  4067. char* path_utf8 = convert_path_heap(path);
  4068. if (!path_utf8) return 0;
  4069. // convert mode to ASCII (we mirror _wfopen interface)
  4070. char mode_ascii[4] = {0};
  4071. for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
  4072. // try to open the utf8 path
  4073. FILE* result = fopen(path_utf8, mode_ascii);
  4074. // free dummy buffer
  4075. xml_memory::deallocate(path_utf8);
  4076. return result;
  4077. }
  4078. #endif
  4079. PUGI_IMPL_FN FILE* open_file(const char* path, const char* mode)
  4080. {
  4081. #if defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400
  4082. FILE* file = 0;
  4083. return fopen_s(&file, path, mode) == 0 ? file : 0;
  4084. #else
  4085. return fopen(path, mode);
  4086. #endif
  4087. }
  4088. PUGI_IMPL_FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
  4089. {
  4090. if (!file) return false;
  4091. xml_writer_file writer(file);
  4092. doc.save(writer, indent, flags, encoding);
  4093. return fflush(file) == 0 && ferror(file) == 0;
  4094. }
  4095. struct name_null_sentry
  4096. {
  4097. xml_node_struct* node;
  4098. char_t* name;
  4099. name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
  4100. {
  4101. node->name = 0;
  4102. }
  4103. ~name_null_sentry()
  4104. {
  4105. node->name = name;
  4106. }
  4107. };
  4108. PUGI_IMPL_NS_END
  4109. namespace pugi
  4110. {
  4111. PUGI_IMPL_FN xml_writer::~xml_writer()
  4112. {
  4113. }
  4114. PUGI_IMPL_FN xml_writer_file::xml_writer_file(void* file_): file(file_)
  4115. {
  4116. }
  4117. PUGI_IMPL_FN void xml_writer_file::write(const void* data, size_t size)
  4118. {
  4119. size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
  4120. (void)!result; // unfortunately we can't do proper error handling here
  4121. }
  4122. #ifndef PUGIXML_NO_STL
  4123. PUGI_IMPL_FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
  4124. {
  4125. }
  4126. PUGI_IMPL_FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
  4127. {
  4128. }
  4129. PUGI_IMPL_FN void xml_writer_stream::write(const void* data, size_t size)
  4130. {
  4131. if (narrow_stream)
  4132. {
  4133. assert(!wide_stream);
  4134. narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
  4135. }
  4136. else
  4137. {
  4138. assert(wide_stream);
  4139. assert(size % sizeof(wchar_t) == 0);
  4140. wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
  4141. }
  4142. }
  4143. #endif
  4144. PUGI_IMPL_FN xml_tree_walker::xml_tree_walker(): _depth(0)
  4145. {
  4146. }
  4147. PUGI_IMPL_FN xml_tree_walker::~xml_tree_walker()
  4148. {
  4149. }
  4150. PUGI_IMPL_FN int xml_tree_walker::depth() const
  4151. {
  4152. return _depth;
  4153. }
  4154. PUGI_IMPL_FN bool xml_tree_walker::begin(xml_node&)
  4155. {
  4156. return true;
  4157. }
  4158. PUGI_IMPL_FN bool xml_tree_walker::end(xml_node&)
  4159. {
  4160. return true;
  4161. }
  4162. PUGI_IMPL_FN xml_attribute::xml_attribute(): _attr(0)
  4163. {
  4164. }
  4165. PUGI_IMPL_FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
  4166. {
  4167. }
  4168. PUGI_IMPL_FN static void unspecified_bool_xml_attribute(xml_attribute***)
  4169. {
  4170. }
  4171. PUGI_IMPL_FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
  4172. {
  4173. return _attr ? unspecified_bool_xml_attribute : 0;
  4174. }
  4175. PUGI_IMPL_FN bool xml_attribute::operator!() const
  4176. {
  4177. return !_attr;
  4178. }
  4179. PUGI_IMPL_FN bool xml_attribute::operator==(const xml_attribute& r) const
  4180. {
  4181. return (_attr == r._attr);
  4182. }
  4183. PUGI_IMPL_FN bool xml_attribute::operator!=(const xml_attribute& r) const
  4184. {
  4185. return (_attr != r._attr);
  4186. }
  4187. PUGI_IMPL_FN bool xml_attribute::operator<(const xml_attribute& r) const
  4188. {
  4189. return (_attr < r._attr);
  4190. }
  4191. PUGI_IMPL_FN bool xml_attribute::operator>(const xml_attribute& r) const
  4192. {
  4193. return (_attr > r._attr);
  4194. }
  4195. PUGI_IMPL_FN bool xml_attribute::operator<=(const xml_attribute& r) const
  4196. {
  4197. return (_attr <= r._attr);
  4198. }
  4199. PUGI_IMPL_FN bool xml_attribute::operator>=(const xml_attribute& r) const
  4200. {
  4201. return (_attr >= r._attr);
  4202. }
  4203. PUGI_IMPL_FN xml_attribute xml_attribute::next_attribute() const
  4204. {
  4205. if (!_attr) return xml_attribute();
  4206. return xml_attribute(_attr->next_attribute);
  4207. }
  4208. PUGI_IMPL_FN xml_attribute xml_attribute::previous_attribute() const
  4209. {
  4210. if (!_attr) return xml_attribute();
  4211. xml_attribute_struct* prev = _attr->prev_attribute_c;
  4212. return prev->next_attribute ? xml_attribute(prev) : xml_attribute();
  4213. }
  4214. PUGI_IMPL_FN const char_t* xml_attribute::as_string(const char_t* def) const
  4215. {
  4216. if (!_attr) return def;
  4217. const char_t* value = _attr->value;
  4218. return value ? value : def;
  4219. }
  4220. PUGI_IMPL_FN int xml_attribute::as_int(int def) const
  4221. {
  4222. if (!_attr) return def;
  4223. const char_t* value = _attr->value;
  4224. return value ? impl::get_value_int(value) : def;
  4225. }
  4226. PUGI_IMPL_FN unsigned int xml_attribute::as_uint(unsigned int def) const
  4227. {
  4228. if (!_attr) return def;
  4229. const char_t* value = _attr->value;
  4230. return value ? impl::get_value_uint(value) : def;
  4231. }
  4232. PUGI_IMPL_FN double xml_attribute::as_double(double def) const
  4233. {
  4234. if (!_attr) return def;
  4235. const char_t* value = _attr->value;
  4236. return value ? impl::get_value_double(value) : def;
  4237. }
  4238. PUGI_IMPL_FN float xml_attribute::as_float(float def) const
  4239. {
  4240. if (!_attr) return def;
  4241. const char_t* value = _attr->value;
  4242. return value ? impl::get_value_float(value) : def;
  4243. }
  4244. PUGI_IMPL_FN bool xml_attribute::as_bool(bool def) const
  4245. {
  4246. if (!_attr) return def;
  4247. const char_t* value = _attr->value;
  4248. return value ? impl::get_value_bool(value) : def;
  4249. }
  4250. #ifdef PUGIXML_HAS_LONG_LONG
  4251. PUGI_IMPL_FN long long xml_attribute::as_llong(long long def) const
  4252. {
  4253. if (!_attr) return def;
  4254. const char_t* value = _attr->value;
  4255. return value ? impl::get_value_llong(value) : def;
  4256. }
  4257. PUGI_IMPL_FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
  4258. {
  4259. if (!_attr) return def;
  4260. const char_t* value = _attr->value;
  4261. return value ? impl::get_value_ullong(value) : def;
  4262. }
  4263. #endif
  4264. PUGI_IMPL_FN bool xml_attribute::empty() const
  4265. {
  4266. return !_attr;
  4267. }
  4268. PUGI_IMPL_FN const char_t* xml_attribute::name() const
  4269. {
  4270. if (!_attr) return PUGIXML_TEXT("");
  4271. const char_t* name = _attr->name;
  4272. return name ? name : PUGIXML_TEXT("");
  4273. }
  4274. PUGI_IMPL_FN const char_t* xml_attribute::value() const
  4275. {
  4276. if (!_attr) return PUGIXML_TEXT("");
  4277. const char_t* value = _attr->value;
  4278. return value ? value : PUGIXML_TEXT("");
  4279. }
  4280. PUGI_IMPL_FN size_t xml_attribute::hash_value() const
  4281. {
  4282. return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
  4283. }
  4284. PUGI_IMPL_FN xml_attribute_struct* xml_attribute::internal_object() const
  4285. {
  4286. return _attr;
  4287. }
  4288. PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
  4289. {
  4290. set_value(rhs);
  4291. return *this;
  4292. }
  4293. PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(int rhs)
  4294. {
  4295. set_value(rhs);
  4296. return *this;
  4297. }
  4298. PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
  4299. {
  4300. set_value(rhs);
  4301. return *this;
  4302. }
  4303. PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(long rhs)
  4304. {
  4305. set_value(rhs);
  4306. return *this;
  4307. }
  4308. PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
  4309. {
  4310. set_value(rhs);
  4311. return *this;
  4312. }
  4313. PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(double rhs)
  4314. {
  4315. set_value(rhs);
  4316. return *this;
  4317. }
  4318. PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(float rhs)
  4319. {
  4320. set_value(rhs);
  4321. return *this;
  4322. }
  4323. PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(bool rhs)
  4324. {
  4325. set_value(rhs);
  4326. return *this;
  4327. }
  4328. #ifdef PUGIXML_HAS_LONG_LONG
  4329. PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(long long rhs)
  4330. {
  4331. set_value(rhs);
  4332. return *this;
  4333. }
  4334. PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
  4335. {
  4336. set_value(rhs);
  4337. return *this;
  4338. }
  4339. #endif
  4340. PUGI_IMPL_FN bool xml_attribute::set_name(const char_t* rhs)
  4341. {
  4342. if (!_attr) return false;
  4343. return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
  4344. }
  4345. PUGI_IMPL_FN bool xml_attribute::set_name(const char_t* rhs, size_t size)
  4346. {
  4347. if (!_attr) return false;
  4348. return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, size);
  4349. }
  4350. PUGI_IMPL_FN bool xml_attribute::set_value(const char_t* rhs)
  4351. {
  4352. if (!_attr) return false;
  4353. return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
  4354. }
  4355. PUGI_IMPL_FN bool xml_attribute::set_value(const char_t* rhs, size_t size)
  4356. {
  4357. if (!_attr) return false;
  4358. return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, size);
  4359. }
  4360. PUGI_IMPL_FN bool xml_attribute::set_value(int rhs)
  4361. {
  4362. if (!_attr) return false;
  4363. return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
  4364. }
  4365. PUGI_IMPL_FN bool xml_attribute::set_value(unsigned int rhs)
  4366. {
  4367. if (!_attr) return false;
  4368. return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
  4369. }
  4370. PUGI_IMPL_FN bool xml_attribute::set_value(long rhs)
  4371. {
  4372. if (!_attr) return false;
  4373. return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
  4374. }
  4375. PUGI_IMPL_FN bool xml_attribute::set_value(unsigned long rhs)
  4376. {
  4377. if (!_attr) return false;
  4378. return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
  4379. }
  4380. PUGI_IMPL_FN bool xml_attribute::set_value(double rhs)
  4381. {
  4382. if (!_attr) return false;
  4383. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision);
  4384. }
  4385. PUGI_IMPL_FN bool xml_attribute::set_value(double rhs, int precision)
  4386. {
  4387. if (!_attr) return false;
  4388. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
  4389. }
  4390. PUGI_IMPL_FN bool xml_attribute::set_value(float rhs)
  4391. {
  4392. if (!_attr) return false;
  4393. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision);
  4394. }
  4395. PUGI_IMPL_FN bool xml_attribute::set_value(float rhs, int precision)
  4396. {
  4397. if (!_attr) return false;
  4398. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
  4399. }
  4400. PUGI_IMPL_FN bool xml_attribute::set_value(bool rhs)
  4401. {
  4402. if (!_attr) return false;
  4403. return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
  4404. }
  4405. #ifdef PUGIXML_HAS_LONG_LONG
  4406. PUGI_IMPL_FN bool xml_attribute::set_value(long long rhs)
  4407. {
  4408. if (!_attr) return false;
  4409. return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
  4410. }
  4411. PUGI_IMPL_FN bool xml_attribute::set_value(unsigned long long rhs)
  4412. {
  4413. if (!_attr) return false;
  4414. return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
  4415. }
  4416. #endif
  4417. #ifdef __BORLANDC__
  4418. PUGI_IMPL_FN bool operator&&(const xml_attribute& lhs, bool rhs)
  4419. {
  4420. return (bool)lhs && rhs;
  4421. }
  4422. PUGI_IMPL_FN bool operator||(const xml_attribute& lhs, bool rhs)
  4423. {
  4424. return (bool)lhs || rhs;
  4425. }
  4426. #endif
  4427. PUGI_IMPL_FN xml_node::xml_node(): _root(0)
  4428. {
  4429. }
  4430. PUGI_IMPL_FN xml_node::xml_node(xml_node_struct* p): _root(p)
  4431. {
  4432. }
  4433. PUGI_IMPL_FN static void unspecified_bool_xml_node(xml_node***)
  4434. {
  4435. }
  4436. PUGI_IMPL_FN xml_node::operator xml_node::unspecified_bool_type() const
  4437. {
  4438. return _root ? unspecified_bool_xml_node : 0;
  4439. }
  4440. PUGI_IMPL_FN bool xml_node::operator!() const
  4441. {
  4442. return !_root;
  4443. }
  4444. PUGI_IMPL_FN xml_node::iterator xml_node::begin() const
  4445. {
  4446. return iterator(_root ? _root->first_child + 0 : 0, _root);
  4447. }
  4448. PUGI_IMPL_FN xml_node::iterator xml_node::end() const
  4449. {
  4450. return iterator(0, _root);
  4451. }
  4452. PUGI_IMPL_FN xml_node::attribute_iterator xml_node::attributes_begin() const
  4453. {
  4454. return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
  4455. }
  4456. PUGI_IMPL_FN xml_node::attribute_iterator xml_node::attributes_end() const
  4457. {
  4458. return attribute_iterator(0, _root);
  4459. }
  4460. PUGI_IMPL_FN xml_object_range<xml_node_iterator> xml_node::children() const
  4461. {
  4462. return xml_object_range<xml_node_iterator>(begin(), end());
  4463. }
  4464. PUGI_IMPL_FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
  4465. {
  4466. return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
  4467. }
  4468. PUGI_IMPL_FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
  4469. {
  4470. return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
  4471. }
  4472. PUGI_IMPL_FN bool xml_node::operator==(const xml_node& r) const
  4473. {
  4474. return (_root == r._root);
  4475. }
  4476. PUGI_IMPL_FN bool xml_node::operator!=(const xml_node& r) const
  4477. {
  4478. return (_root != r._root);
  4479. }
  4480. PUGI_IMPL_FN bool xml_node::operator<(const xml_node& r) const
  4481. {
  4482. return (_root < r._root);
  4483. }
  4484. PUGI_IMPL_FN bool xml_node::operator>(const xml_node& r) const
  4485. {
  4486. return (_root > r._root);
  4487. }
  4488. PUGI_IMPL_FN bool xml_node::operator<=(const xml_node& r) const
  4489. {
  4490. return (_root <= r._root);
  4491. }
  4492. PUGI_IMPL_FN bool xml_node::operator>=(const xml_node& r) const
  4493. {
  4494. return (_root >= r._root);
  4495. }
  4496. PUGI_IMPL_FN bool xml_node::empty() const
  4497. {
  4498. return !_root;
  4499. }
  4500. PUGI_IMPL_FN const char_t* xml_node::name() const
  4501. {
  4502. if (!_root) return PUGIXML_TEXT("");
  4503. const char_t* name = _root->name;
  4504. return name ? name : PUGIXML_TEXT("");
  4505. }
  4506. PUGI_IMPL_FN xml_node_type xml_node::type() const
  4507. {
  4508. return _root ? PUGI_IMPL_NODETYPE(_root) : node_null;
  4509. }
  4510. PUGI_IMPL_FN const char_t* xml_node::value() const
  4511. {
  4512. if (!_root) return PUGIXML_TEXT("");
  4513. const char_t* value = _root->value;
  4514. return value ? value : PUGIXML_TEXT("");
  4515. }
  4516. PUGI_IMPL_FN xml_node xml_node::child(const char_t* name_) const
  4517. {
  4518. if (!_root) return xml_node();
  4519. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4520. {
  4521. const char_t* iname = i->name;
  4522. if (iname && impl::strequal(name_, iname))
  4523. return xml_node(i);
  4524. }
  4525. return xml_node();
  4526. }
  4527. PUGI_IMPL_FN xml_attribute xml_node::attribute(const char_t* name_) const
  4528. {
  4529. if (!_root) return xml_attribute();
  4530. for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
  4531. {
  4532. const char_t* iname = i->name;
  4533. if (iname && impl::strequal(name_, iname))
  4534. return xml_attribute(i);
  4535. }
  4536. return xml_attribute();
  4537. }
  4538. PUGI_IMPL_FN xml_node xml_node::next_sibling(const char_t* name_) const
  4539. {
  4540. if (!_root) return xml_node();
  4541. for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
  4542. {
  4543. const char_t* iname = i->name;
  4544. if (iname && impl::strequal(name_, iname))
  4545. return xml_node(i);
  4546. }
  4547. return xml_node();
  4548. }
  4549. PUGI_IMPL_FN xml_node xml_node::next_sibling() const
  4550. {
  4551. return _root ? xml_node(_root->next_sibling) : xml_node();
  4552. }
  4553. PUGI_IMPL_FN xml_node xml_node::previous_sibling(const char_t* name_) const
  4554. {
  4555. if (!_root) return xml_node();
  4556. for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
  4557. {
  4558. const char_t* iname = i->name;
  4559. if (iname && impl::strequal(name_, iname))
  4560. return xml_node(i);
  4561. }
  4562. return xml_node();
  4563. }
  4564. PUGI_IMPL_FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
  4565. {
  4566. xml_attribute_struct* hint = hint_._attr;
  4567. // if hint is not an attribute of node, behavior is not defined
  4568. assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
  4569. if (!_root) return xml_attribute();
  4570. // optimistically search from hint up until the end
  4571. for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
  4572. {
  4573. const char_t* iname = i->name;
  4574. if (iname && impl::strequal(name_, iname))
  4575. {
  4576. // update hint to maximize efficiency of searching for consecutive attributes
  4577. hint_._attr = i->next_attribute;
  4578. return xml_attribute(i);
  4579. }
  4580. }
  4581. // wrap around and search from the first attribute until the hint
  4582. // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
  4583. for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
  4584. {
  4585. const char_t* jname = j->name;
  4586. if (jname && impl::strequal(name_, jname))
  4587. {
  4588. // update hint to maximize efficiency of searching for consecutive attributes
  4589. hint_._attr = j->next_attribute;
  4590. return xml_attribute(j);
  4591. }
  4592. }
  4593. return xml_attribute();
  4594. }
  4595. PUGI_IMPL_FN xml_node xml_node::previous_sibling() const
  4596. {
  4597. if (!_root) return xml_node();
  4598. xml_node_struct* prev = _root->prev_sibling_c;
  4599. return prev->next_sibling ? xml_node(prev) : xml_node();
  4600. }
  4601. PUGI_IMPL_FN xml_node xml_node::parent() const
  4602. {
  4603. return _root ? xml_node(_root->parent) : xml_node();
  4604. }
  4605. PUGI_IMPL_FN xml_node xml_node::root() const
  4606. {
  4607. return _root ? xml_node(&impl::get_document(_root)) : xml_node();
  4608. }
  4609. PUGI_IMPL_FN xml_text xml_node::text() const
  4610. {
  4611. return xml_text(_root);
  4612. }
  4613. PUGI_IMPL_FN const char_t* xml_node::child_value() const
  4614. {
  4615. if (!_root) return PUGIXML_TEXT("");
  4616. // element nodes can have value if parse_embed_pcdata was used
  4617. if (PUGI_IMPL_NODETYPE(_root) == node_element && _root->value)
  4618. return _root->value;
  4619. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4620. {
  4621. const char_t* ivalue = i->value;
  4622. if (impl::is_text_node(i) && ivalue)
  4623. return ivalue;
  4624. }
  4625. return PUGIXML_TEXT("");
  4626. }
  4627. PUGI_IMPL_FN const char_t* xml_node::child_value(const char_t* name_) const
  4628. {
  4629. return child(name_).child_value();
  4630. }
  4631. PUGI_IMPL_FN xml_attribute xml_node::first_attribute() const
  4632. {
  4633. if (!_root) return xml_attribute();
  4634. return xml_attribute(_root->first_attribute);
  4635. }
  4636. PUGI_IMPL_FN xml_attribute xml_node::last_attribute() const
  4637. {
  4638. if (!_root) return xml_attribute();
  4639. xml_attribute_struct* first = _root->first_attribute;
  4640. return first ? xml_attribute(first->prev_attribute_c) : xml_attribute();
  4641. }
  4642. PUGI_IMPL_FN xml_node xml_node::first_child() const
  4643. {
  4644. if (!_root) return xml_node();
  4645. return xml_node(_root->first_child);
  4646. }
  4647. PUGI_IMPL_FN xml_node xml_node::last_child() const
  4648. {
  4649. if (!_root) return xml_node();
  4650. xml_node_struct* first = _root->first_child;
  4651. return first ? xml_node(first->prev_sibling_c) : xml_node();
  4652. }
  4653. PUGI_IMPL_FN bool xml_node::set_name(const char_t* rhs)
  4654. {
  4655. xml_node_type type_ = _root ? PUGI_IMPL_NODETYPE(_root) : node_null;
  4656. if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
  4657. return false;
  4658. return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
  4659. }
  4660. PUGI_IMPL_FN bool xml_node::set_name(const char_t* rhs, size_t size)
  4661. {
  4662. xml_node_type type_ = _root ? PUGI_IMPL_NODETYPE(_root) : node_null;
  4663. if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
  4664. return false;
  4665. return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, size);
  4666. }
  4667. PUGI_IMPL_FN bool xml_node::set_value(const char_t* rhs)
  4668. {
  4669. xml_node_type type_ = _root ? PUGI_IMPL_NODETYPE(_root) : node_null;
  4670. if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
  4671. return false;
  4672. return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
  4673. }
  4674. PUGI_IMPL_FN bool xml_node::set_value(const char_t* rhs, size_t size)
  4675. {
  4676. xml_node_type type_ = _root ? PUGI_IMPL_NODETYPE(_root) : node_null;
  4677. if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
  4678. return false;
  4679. return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, size);
  4680. }
  4681. PUGI_IMPL_FN xml_attribute xml_node::append_attribute(const char_t* name_)
  4682. {
  4683. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4684. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4685. if (!alloc.reserve()) return xml_attribute();
  4686. xml_attribute a(impl::allocate_attribute(alloc));
  4687. if (!a) return xml_attribute();
  4688. impl::append_attribute(a._attr, _root);
  4689. a.set_name(name_);
  4690. return a;
  4691. }
  4692. PUGI_IMPL_FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
  4693. {
  4694. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4695. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4696. if (!alloc.reserve()) return xml_attribute();
  4697. xml_attribute a(impl::allocate_attribute(alloc));
  4698. if (!a) return xml_attribute();
  4699. impl::prepend_attribute(a._attr, _root);
  4700. a.set_name(name_);
  4701. return a;
  4702. }
  4703. PUGI_IMPL_FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
  4704. {
  4705. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4706. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4707. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4708. if (!alloc.reserve()) return xml_attribute();
  4709. xml_attribute a(impl::allocate_attribute(alloc));
  4710. if (!a) return xml_attribute();
  4711. impl::insert_attribute_after(a._attr, attr._attr, _root);
  4712. a.set_name(name_);
  4713. return a;
  4714. }
  4715. PUGI_IMPL_FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
  4716. {
  4717. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4718. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4719. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4720. if (!alloc.reserve()) return xml_attribute();
  4721. xml_attribute a(impl::allocate_attribute(alloc));
  4722. if (!a) return xml_attribute();
  4723. impl::insert_attribute_before(a._attr, attr._attr, _root);
  4724. a.set_name(name_);
  4725. return a;
  4726. }
  4727. PUGI_IMPL_FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
  4728. {
  4729. if (!proto) return xml_attribute();
  4730. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4731. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4732. if (!alloc.reserve()) return xml_attribute();
  4733. xml_attribute a(impl::allocate_attribute(alloc));
  4734. if (!a) return xml_attribute();
  4735. impl::append_attribute(a._attr, _root);
  4736. impl::node_copy_attribute(a._attr, proto._attr);
  4737. return a;
  4738. }
  4739. PUGI_IMPL_FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
  4740. {
  4741. if (!proto) return xml_attribute();
  4742. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4743. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4744. if (!alloc.reserve()) return xml_attribute();
  4745. xml_attribute a(impl::allocate_attribute(alloc));
  4746. if (!a) return xml_attribute();
  4747. impl::prepend_attribute(a._attr, _root);
  4748. impl::node_copy_attribute(a._attr, proto._attr);
  4749. return a;
  4750. }
  4751. PUGI_IMPL_FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
  4752. {
  4753. if (!proto) return xml_attribute();
  4754. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4755. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4756. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4757. if (!alloc.reserve()) return xml_attribute();
  4758. xml_attribute a(impl::allocate_attribute(alloc));
  4759. if (!a) return xml_attribute();
  4760. impl::insert_attribute_after(a._attr, attr._attr, _root);
  4761. impl::node_copy_attribute(a._attr, proto._attr);
  4762. return a;
  4763. }
  4764. PUGI_IMPL_FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
  4765. {
  4766. if (!proto) return xml_attribute();
  4767. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4768. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4769. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4770. if (!alloc.reserve()) return xml_attribute();
  4771. xml_attribute a(impl::allocate_attribute(alloc));
  4772. if (!a) return xml_attribute();
  4773. impl::insert_attribute_before(a._attr, attr._attr, _root);
  4774. impl::node_copy_attribute(a._attr, proto._attr);
  4775. return a;
  4776. }
  4777. PUGI_IMPL_FN xml_node xml_node::append_child(xml_node_type type_)
  4778. {
  4779. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4780. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4781. if (!alloc.reserve()) return xml_node();
  4782. xml_node n(impl::allocate_node(alloc, type_));
  4783. if (!n) return xml_node();
  4784. impl::append_node(n._root, _root);
  4785. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4786. return n;
  4787. }
  4788. PUGI_IMPL_FN xml_node xml_node::prepend_child(xml_node_type type_)
  4789. {
  4790. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4791. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4792. if (!alloc.reserve()) return xml_node();
  4793. xml_node n(impl::allocate_node(alloc, type_));
  4794. if (!n) return xml_node();
  4795. impl::prepend_node(n._root, _root);
  4796. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4797. return n;
  4798. }
  4799. PUGI_IMPL_FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
  4800. {
  4801. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4802. if (!node._root || node._root->parent != _root) return xml_node();
  4803. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4804. if (!alloc.reserve()) return xml_node();
  4805. xml_node n(impl::allocate_node(alloc, type_));
  4806. if (!n) return xml_node();
  4807. impl::insert_node_before(n._root, node._root);
  4808. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4809. return n;
  4810. }
  4811. PUGI_IMPL_FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
  4812. {
  4813. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4814. if (!node._root || node._root->parent != _root) return xml_node();
  4815. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4816. if (!alloc.reserve()) return xml_node();
  4817. xml_node n(impl::allocate_node(alloc, type_));
  4818. if (!n) return xml_node();
  4819. impl::insert_node_after(n._root, node._root);
  4820. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4821. return n;
  4822. }
  4823. PUGI_IMPL_FN xml_node xml_node::append_child(const char_t* name_)
  4824. {
  4825. xml_node result = append_child(node_element);
  4826. result.set_name(name_);
  4827. return result;
  4828. }
  4829. PUGI_IMPL_FN xml_node xml_node::prepend_child(const char_t* name_)
  4830. {
  4831. xml_node result = prepend_child(node_element);
  4832. result.set_name(name_);
  4833. return result;
  4834. }
  4835. PUGI_IMPL_FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
  4836. {
  4837. xml_node result = insert_child_after(node_element, node);
  4838. result.set_name(name_);
  4839. return result;
  4840. }
  4841. PUGI_IMPL_FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
  4842. {
  4843. xml_node result = insert_child_before(node_element, node);
  4844. result.set_name(name_);
  4845. return result;
  4846. }
  4847. PUGI_IMPL_FN xml_node xml_node::append_copy(const xml_node& proto)
  4848. {
  4849. xml_node_type type_ = proto.type();
  4850. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4851. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4852. if (!alloc.reserve()) return xml_node();
  4853. xml_node n(impl::allocate_node(alloc, type_));
  4854. if (!n) return xml_node();
  4855. impl::append_node(n._root, _root);
  4856. impl::node_copy_tree(n._root, proto._root);
  4857. return n;
  4858. }
  4859. PUGI_IMPL_FN xml_node xml_node::prepend_copy(const xml_node& proto)
  4860. {
  4861. xml_node_type type_ = proto.type();
  4862. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4863. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4864. if (!alloc.reserve()) return xml_node();
  4865. xml_node n(impl::allocate_node(alloc, type_));
  4866. if (!n) return xml_node();
  4867. impl::prepend_node(n._root, _root);
  4868. impl::node_copy_tree(n._root, proto._root);
  4869. return n;
  4870. }
  4871. PUGI_IMPL_FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
  4872. {
  4873. xml_node_type type_ = proto.type();
  4874. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4875. if (!node._root || node._root->parent != _root) return xml_node();
  4876. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4877. if (!alloc.reserve()) return xml_node();
  4878. xml_node n(impl::allocate_node(alloc, type_));
  4879. if (!n) return xml_node();
  4880. impl::insert_node_after(n._root, node._root);
  4881. impl::node_copy_tree(n._root, proto._root);
  4882. return n;
  4883. }
  4884. PUGI_IMPL_FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
  4885. {
  4886. xml_node_type type_ = proto.type();
  4887. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4888. if (!node._root || node._root->parent != _root) return xml_node();
  4889. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4890. if (!alloc.reserve()) return xml_node();
  4891. xml_node n(impl::allocate_node(alloc, type_));
  4892. if (!n) return xml_node();
  4893. impl::insert_node_before(n._root, node._root);
  4894. impl::node_copy_tree(n._root, proto._root);
  4895. return n;
  4896. }
  4897. PUGI_IMPL_FN xml_node xml_node::append_move(const xml_node& moved)
  4898. {
  4899. if (!impl::allow_move(*this, moved)) return xml_node();
  4900. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4901. if (!alloc.reserve()) return xml_node();
  4902. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4903. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4904. impl::remove_node(moved._root);
  4905. impl::append_node(moved._root, _root);
  4906. return moved;
  4907. }
  4908. PUGI_IMPL_FN xml_node xml_node::prepend_move(const xml_node& moved)
  4909. {
  4910. if (!impl::allow_move(*this, moved)) return xml_node();
  4911. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4912. if (!alloc.reserve()) return xml_node();
  4913. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4914. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4915. impl::remove_node(moved._root);
  4916. impl::prepend_node(moved._root, _root);
  4917. return moved;
  4918. }
  4919. PUGI_IMPL_FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
  4920. {
  4921. if (!impl::allow_move(*this, moved)) return xml_node();
  4922. if (!node._root || node._root->parent != _root) return xml_node();
  4923. if (moved._root == node._root) return xml_node();
  4924. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4925. if (!alloc.reserve()) return xml_node();
  4926. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4927. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4928. impl::remove_node(moved._root);
  4929. impl::insert_node_after(moved._root, node._root);
  4930. return moved;
  4931. }
  4932. PUGI_IMPL_FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
  4933. {
  4934. if (!impl::allow_move(*this, moved)) return xml_node();
  4935. if (!node._root || node._root->parent != _root) return xml_node();
  4936. if (moved._root == node._root) return xml_node();
  4937. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4938. if (!alloc.reserve()) return xml_node();
  4939. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4940. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4941. impl::remove_node(moved._root);
  4942. impl::insert_node_before(moved._root, node._root);
  4943. return moved;
  4944. }
  4945. PUGI_IMPL_FN bool xml_node::remove_attribute(const char_t* name_)
  4946. {
  4947. return remove_attribute(attribute(name_));
  4948. }
  4949. PUGI_IMPL_FN bool xml_node::remove_attribute(const xml_attribute& a)
  4950. {
  4951. if (!_root || !a._attr) return false;
  4952. if (!impl::is_attribute_of(a._attr, _root)) return false;
  4953. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4954. if (!alloc.reserve()) return false;
  4955. impl::remove_attribute(a._attr, _root);
  4956. impl::destroy_attribute(a._attr, alloc);
  4957. return true;
  4958. }
  4959. PUGI_IMPL_FN bool xml_node::remove_attributes()
  4960. {
  4961. if (!_root) return false;
  4962. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4963. if (!alloc.reserve()) return false;
  4964. for (xml_attribute_struct* attr = _root->first_attribute; attr; )
  4965. {
  4966. xml_attribute_struct* next = attr->next_attribute;
  4967. impl::destroy_attribute(attr, alloc);
  4968. attr = next;
  4969. }
  4970. _root->first_attribute = 0;
  4971. return true;
  4972. }
  4973. PUGI_IMPL_FN bool xml_node::remove_child(const char_t* name_)
  4974. {
  4975. return remove_child(child(name_));
  4976. }
  4977. PUGI_IMPL_FN bool xml_node::remove_child(const xml_node& n)
  4978. {
  4979. if (!_root || !n._root || n._root->parent != _root) return false;
  4980. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4981. if (!alloc.reserve()) return false;
  4982. impl::remove_node(n._root);
  4983. impl::destroy_node(n._root, alloc);
  4984. return true;
  4985. }
  4986. PUGI_IMPL_FN bool xml_node::remove_children()
  4987. {
  4988. if (!_root) return false;
  4989. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4990. if (!alloc.reserve()) return false;
  4991. for (xml_node_struct* cur = _root->first_child; cur; )
  4992. {
  4993. xml_node_struct* next = cur->next_sibling;
  4994. impl::destroy_node(cur, alloc);
  4995. cur = next;
  4996. }
  4997. _root->first_child = 0;
  4998. return true;
  4999. }
  5000. PUGI_IMPL_FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5001. {
  5002. // append_buffer is only valid for elements/documents
  5003. if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
  5004. // append buffer can not merge PCDATA into existing PCDATA nodes
  5005. if ((options & parse_merge_pcdata) != 0 && last_child().type() == node_pcdata) return impl::make_parse_result(status_append_invalid_root);
  5006. // get document node
  5007. impl::xml_document_struct* doc = &impl::get_document(_root);
  5008. // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
  5009. doc->header |= impl::xml_memory_page_contents_shared_mask;
  5010. // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
  5011. impl::xml_memory_page* page = 0;
  5012. impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
  5013. (void)page;
  5014. if (!extra) return impl::make_parse_result(status_out_of_memory);
  5015. #ifdef PUGIXML_COMPACT
  5016. // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
  5017. // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
  5018. extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
  5019. #endif
  5020. // add extra buffer to the list
  5021. extra->buffer = 0;
  5022. extra->next = doc->extra_buffers;
  5023. doc->extra_buffers = extra;
  5024. // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
  5025. impl::name_null_sentry sentry(_root);
  5026. return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
  5027. }
  5028. PUGI_IMPL_FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
  5029. {
  5030. if (!_root) return xml_node();
  5031. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  5032. {
  5033. const char_t* iname = i->name;
  5034. if (iname && impl::strequal(name_, iname))
  5035. {
  5036. for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
  5037. {
  5038. const char_t* aname = a->name;
  5039. if (aname && impl::strequal(attr_name, aname))
  5040. {
  5041. const char_t* avalue = a->value;
  5042. if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT("")))
  5043. return xml_node(i);
  5044. }
  5045. }
  5046. }
  5047. }
  5048. return xml_node();
  5049. }
  5050. PUGI_IMPL_FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
  5051. {
  5052. if (!_root) return xml_node();
  5053. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  5054. for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
  5055. {
  5056. const char_t* aname = a->name;
  5057. if (aname && impl::strequal(attr_name, aname))
  5058. {
  5059. const char_t* avalue = a->value;
  5060. if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT("")))
  5061. return xml_node(i);
  5062. }
  5063. }
  5064. return xml_node();
  5065. }
  5066. #ifndef PUGIXML_NO_STL
  5067. PUGI_IMPL_FN string_t xml_node::path(char_t delimiter) const
  5068. {
  5069. if (!_root) return string_t();
  5070. size_t offset = 0;
  5071. for (xml_node_struct* i = _root; i; i = i->parent)
  5072. {
  5073. const char_t* iname = i->name;
  5074. offset += (i != _root);
  5075. offset += iname ? impl::strlength(iname) : 0;
  5076. }
  5077. string_t result;
  5078. result.resize(offset);
  5079. for (xml_node_struct* j = _root; j; j = j->parent)
  5080. {
  5081. if (j != _root)
  5082. result[--offset] = delimiter;
  5083. const char_t* jname = j->name;
  5084. if (jname)
  5085. {
  5086. size_t length = impl::strlength(jname);
  5087. offset -= length;
  5088. memcpy(&result[offset], jname, length * sizeof(char_t));
  5089. }
  5090. }
  5091. assert(offset == 0);
  5092. return result;
  5093. }
  5094. #endif
  5095. PUGI_IMPL_FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
  5096. {
  5097. xml_node context = path_[0] == delimiter ? root() : *this;
  5098. if (!context._root) return xml_node();
  5099. const char_t* path_segment = path_;
  5100. while (*path_segment == delimiter) ++path_segment;
  5101. const char_t* path_segment_end = path_segment;
  5102. while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
  5103. if (path_segment == path_segment_end) return context;
  5104. const char_t* next_segment = path_segment_end;
  5105. while (*next_segment == delimiter) ++next_segment;
  5106. if (*path_segment == '.' && path_segment + 1 == path_segment_end)
  5107. return context.first_element_by_path(next_segment, delimiter);
  5108. else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
  5109. return context.parent().first_element_by_path(next_segment, delimiter);
  5110. else
  5111. {
  5112. for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling)
  5113. {
  5114. const char_t* jname = j->name;
  5115. if (jname && impl::strequalrange(jname, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
  5116. {
  5117. xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
  5118. if (subsearch) return subsearch;
  5119. }
  5120. }
  5121. return xml_node();
  5122. }
  5123. }
  5124. PUGI_IMPL_FN bool xml_node::traverse(xml_tree_walker& walker)
  5125. {
  5126. walker._depth = -1;
  5127. xml_node arg_begin(_root);
  5128. if (!walker.begin(arg_begin)) return false;
  5129. xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
  5130. if (cur)
  5131. {
  5132. ++walker._depth;
  5133. do
  5134. {
  5135. xml_node arg_for_each(cur);
  5136. if (!walker.for_each(arg_for_each))
  5137. return false;
  5138. if (cur->first_child)
  5139. {
  5140. ++walker._depth;
  5141. cur = cur->first_child;
  5142. }
  5143. else if (cur->next_sibling)
  5144. cur = cur->next_sibling;
  5145. else
  5146. {
  5147. while (!cur->next_sibling && cur != _root && cur->parent)
  5148. {
  5149. --walker._depth;
  5150. cur = cur->parent;
  5151. }
  5152. if (cur != _root)
  5153. cur = cur->next_sibling;
  5154. }
  5155. }
  5156. while (cur && cur != _root);
  5157. }
  5158. assert(walker._depth == -1);
  5159. xml_node arg_end(_root);
  5160. return walker.end(arg_end);
  5161. }
  5162. PUGI_IMPL_FN size_t xml_node::hash_value() const
  5163. {
  5164. return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
  5165. }
  5166. PUGI_IMPL_FN xml_node_struct* xml_node::internal_object() const
  5167. {
  5168. return _root;
  5169. }
  5170. PUGI_IMPL_FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
  5171. {
  5172. if (!_root) return;
  5173. impl::xml_buffered_writer buffered_writer(writer, encoding);
  5174. impl::node_output(buffered_writer, _root, indent, flags, depth);
  5175. buffered_writer.flush();
  5176. }
  5177. #ifndef PUGIXML_NO_STL
  5178. PUGI_IMPL_FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
  5179. {
  5180. xml_writer_stream writer(stream);
  5181. print(writer, indent, flags, encoding, depth);
  5182. }
  5183. PUGI_IMPL_FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
  5184. {
  5185. xml_writer_stream writer(stream);
  5186. print(writer, indent, flags, encoding_wchar, depth);
  5187. }
  5188. #endif
  5189. PUGI_IMPL_FN ptrdiff_t xml_node::offset_debug() const
  5190. {
  5191. if (!_root) return -1;
  5192. impl::xml_document_struct& doc = impl::get_document(_root);
  5193. // we can determine the offset reliably only if there is exactly once parse buffer
  5194. if (!doc.buffer || doc.extra_buffers) return -1;
  5195. switch (type())
  5196. {
  5197. case node_document:
  5198. return 0;
  5199. case node_element:
  5200. case node_declaration:
  5201. case node_pi:
  5202. return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
  5203. case node_pcdata:
  5204. case node_cdata:
  5205. case node_comment:
  5206. case node_doctype:
  5207. return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
  5208. default:
  5209. assert(false && "Invalid node type"); // unreachable
  5210. return -1;
  5211. }
  5212. }
  5213. #ifdef __BORLANDC__
  5214. PUGI_IMPL_FN bool operator&&(const xml_node& lhs, bool rhs)
  5215. {
  5216. return (bool)lhs && rhs;
  5217. }
  5218. PUGI_IMPL_FN bool operator||(const xml_node& lhs, bool rhs)
  5219. {
  5220. return (bool)lhs || rhs;
  5221. }
  5222. #endif
  5223. PUGI_IMPL_FN xml_text::xml_text(xml_node_struct* root): _root(root)
  5224. {
  5225. }
  5226. PUGI_IMPL_FN xml_node_struct* xml_text::_data() const
  5227. {
  5228. if (!_root || impl::is_text_node(_root)) return _root;
  5229. // element nodes can have value if parse_embed_pcdata was used
  5230. if (PUGI_IMPL_NODETYPE(_root) == node_element && _root->value)
  5231. return _root;
  5232. for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
  5233. if (impl::is_text_node(node))
  5234. return node;
  5235. return 0;
  5236. }
  5237. PUGI_IMPL_FN xml_node_struct* xml_text::_data_new()
  5238. {
  5239. xml_node_struct* d = _data();
  5240. if (d) return d;
  5241. return xml_node(_root).append_child(node_pcdata).internal_object();
  5242. }
  5243. PUGI_IMPL_FN xml_text::xml_text(): _root(0)
  5244. {
  5245. }
  5246. PUGI_IMPL_FN static void unspecified_bool_xml_text(xml_text***)
  5247. {
  5248. }
  5249. PUGI_IMPL_FN xml_text::operator xml_text::unspecified_bool_type() const
  5250. {
  5251. return _data() ? unspecified_bool_xml_text : 0;
  5252. }
  5253. PUGI_IMPL_FN bool xml_text::operator!() const
  5254. {
  5255. return !_data();
  5256. }
  5257. PUGI_IMPL_FN bool xml_text::empty() const
  5258. {
  5259. return _data() == 0;
  5260. }
  5261. PUGI_IMPL_FN const char_t* xml_text::get() const
  5262. {
  5263. xml_node_struct* d = _data();
  5264. if (!d) return PUGIXML_TEXT("");
  5265. const char_t* value = d->value;
  5266. return value ? value : PUGIXML_TEXT("");
  5267. }
  5268. PUGI_IMPL_FN const char_t* xml_text::as_string(const char_t* def) const
  5269. {
  5270. xml_node_struct* d = _data();
  5271. if (!d) return def;
  5272. const char_t* value = d->value;
  5273. return value ? value : def;
  5274. }
  5275. PUGI_IMPL_FN int xml_text::as_int(int def) const
  5276. {
  5277. xml_node_struct* d = _data();
  5278. if (!d) return def;
  5279. const char_t* value = d->value;
  5280. return value ? impl::get_value_int(value) : def;
  5281. }
  5282. PUGI_IMPL_FN unsigned int xml_text::as_uint(unsigned int def) const
  5283. {
  5284. xml_node_struct* d = _data();
  5285. if (!d) return def;
  5286. const char_t* value = d->value;
  5287. return value ? impl::get_value_uint(value) : def;
  5288. }
  5289. PUGI_IMPL_FN double xml_text::as_double(double def) const
  5290. {
  5291. xml_node_struct* d = _data();
  5292. if (!d) return def;
  5293. const char_t* value = d->value;
  5294. return value ? impl::get_value_double(value) : def;
  5295. }
  5296. PUGI_IMPL_FN float xml_text::as_float(float def) const
  5297. {
  5298. xml_node_struct* d = _data();
  5299. if (!d) return def;
  5300. const char_t* value = d->value;
  5301. return value ? impl::get_value_float(value) : def;
  5302. }
  5303. PUGI_IMPL_FN bool xml_text::as_bool(bool def) const
  5304. {
  5305. xml_node_struct* d = _data();
  5306. if (!d) return def;
  5307. const char_t* value = d->value;
  5308. return value ? impl::get_value_bool(value) : def;
  5309. }
  5310. #ifdef PUGIXML_HAS_LONG_LONG
  5311. PUGI_IMPL_FN long long xml_text::as_llong(long long def) const
  5312. {
  5313. xml_node_struct* d = _data();
  5314. if (!d) return def;
  5315. const char_t* value = d->value;
  5316. return value ? impl::get_value_llong(value) : def;
  5317. }
  5318. PUGI_IMPL_FN unsigned long long xml_text::as_ullong(unsigned long long def) const
  5319. {
  5320. xml_node_struct* d = _data();
  5321. if (!d) return def;
  5322. const char_t* value = d->value;
  5323. return value ? impl::get_value_ullong(value) : def;
  5324. }
  5325. #endif
  5326. PUGI_IMPL_FN bool xml_text::set(const char_t* rhs)
  5327. {
  5328. xml_node_struct* dn = _data_new();
  5329. return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
  5330. }
  5331. PUGI_IMPL_FN bool xml_text::set(const char_t* rhs, size_t size)
  5332. {
  5333. xml_node_struct* dn = _data_new();
  5334. return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, size) : false;
  5335. }
  5336. PUGI_IMPL_FN bool xml_text::set(int rhs)
  5337. {
  5338. xml_node_struct* dn = _data_new();
  5339. return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
  5340. }
  5341. PUGI_IMPL_FN bool xml_text::set(unsigned int rhs)
  5342. {
  5343. xml_node_struct* dn = _data_new();
  5344. return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
  5345. }
  5346. PUGI_IMPL_FN bool xml_text::set(long rhs)
  5347. {
  5348. xml_node_struct* dn = _data_new();
  5349. return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
  5350. }
  5351. PUGI_IMPL_FN bool xml_text::set(unsigned long rhs)
  5352. {
  5353. xml_node_struct* dn = _data_new();
  5354. return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
  5355. }
  5356. PUGI_IMPL_FN bool xml_text::set(float rhs)
  5357. {
  5358. xml_node_struct* dn = _data_new();
  5359. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false;
  5360. }
  5361. PUGI_IMPL_FN bool xml_text::set(float rhs, int precision)
  5362. {
  5363. xml_node_struct* dn = _data_new();
  5364. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
  5365. }
  5366. PUGI_IMPL_FN bool xml_text::set(double rhs)
  5367. {
  5368. xml_node_struct* dn = _data_new();
  5369. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false;
  5370. }
  5371. PUGI_IMPL_FN bool xml_text::set(double rhs, int precision)
  5372. {
  5373. xml_node_struct* dn = _data_new();
  5374. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
  5375. }
  5376. PUGI_IMPL_FN bool xml_text::set(bool rhs)
  5377. {
  5378. xml_node_struct* dn = _data_new();
  5379. return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
  5380. }
  5381. #ifdef PUGIXML_HAS_LONG_LONG
  5382. PUGI_IMPL_FN bool xml_text::set(long long rhs)
  5383. {
  5384. xml_node_struct* dn = _data_new();
  5385. return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
  5386. }
  5387. PUGI_IMPL_FN bool xml_text::set(unsigned long long rhs)
  5388. {
  5389. xml_node_struct* dn = _data_new();
  5390. return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
  5391. }
  5392. #endif
  5393. PUGI_IMPL_FN xml_text& xml_text::operator=(const char_t* rhs)
  5394. {
  5395. set(rhs);
  5396. return *this;
  5397. }
  5398. PUGI_IMPL_FN xml_text& xml_text::operator=(int rhs)
  5399. {
  5400. set(rhs);
  5401. return *this;
  5402. }
  5403. PUGI_IMPL_FN xml_text& xml_text::operator=(unsigned int rhs)
  5404. {
  5405. set(rhs);
  5406. return *this;
  5407. }
  5408. PUGI_IMPL_FN xml_text& xml_text::operator=(long rhs)
  5409. {
  5410. set(rhs);
  5411. return *this;
  5412. }
  5413. PUGI_IMPL_FN xml_text& xml_text::operator=(unsigned long rhs)
  5414. {
  5415. set(rhs);
  5416. return *this;
  5417. }
  5418. PUGI_IMPL_FN xml_text& xml_text::operator=(double rhs)
  5419. {
  5420. set(rhs);
  5421. return *this;
  5422. }
  5423. PUGI_IMPL_FN xml_text& xml_text::operator=(float rhs)
  5424. {
  5425. set(rhs);
  5426. return *this;
  5427. }
  5428. PUGI_IMPL_FN xml_text& xml_text::operator=(bool rhs)
  5429. {
  5430. set(rhs);
  5431. return *this;
  5432. }
  5433. #ifdef PUGIXML_HAS_LONG_LONG
  5434. PUGI_IMPL_FN xml_text& xml_text::operator=(long long rhs)
  5435. {
  5436. set(rhs);
  5437. return *this;
  5438. }
  5439. PUGI_IMPL_FN xml_text& xml_text::operator=(unsigned long long rhs)
  5440. {
  5441. set(rhs);
  5442. return *this;
  5443. }
  5444. #endif
  5445. PUGI_IMPL_FN xml_node xml_text::data() const
  5446. {
  5447. return xml_node(_data());
  5448. }
  5449. #ifdef __BORLANDC__
  5450. PUGI_IMPL_FN bool operator&&(const xml_text& lhs, bool rhs)
  5451. {
  5452. return (bool)lhs && rhs;
  5453. }
  5454. PUGI_IMPL_FN bool operator||(const xml_text& lhs, bool rhs)
  5455. {
  5456. return (bool)lhs || rhs;
  5457. }
  5458. #endif
  5459. PUGI_IMPL_FN xml_node_iterator::xml_node_iterator()
  5460. {
  5461. }
  5462. PUGI_IMPL_FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
  5463. {
  5464. }
  5465. PUGI_IMPL_FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
  5466. {
  5467. }
  5468. PUGI_IMPL_FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
  5469. {
  5470. return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
  5471. }
  5472. PUGI_IMPL_FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
  5473. {
  5474. return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
  5475. }
  5476. PUGI_IMPL_FN xml_node& xml_node_iterator::operator*() const
  5477. {
  5478. assert(_wrap._root);
  5479. return _wrap;
  5480. }
  5481. PUGI_IMPL_FN xml_node* xml_node_iterator::operator->() const
  5482. {
  5483. assert(_wrap._root);
  5484. return const_cast<xml_node*>(&_wrap); // BCC5 workaround
  5485. }
  5486. PUGI_IMPL_FN xml_node_iterator& xml_node_iterator::operator++()
  5487. {
  5488. assert(_wrap._root);
  5489. _wrap._root = _wrap._root->next_sibling;
  5490. return *this;
  5491. }
  5492. PUGI_IMPL_FN xml_node_iterator xml_node_iterator::operator++(int)
  5493. {
  5494. xml_node_iterator temp = *this;
  5495. ++*this;
  5496. return temp;
  5497. }
  5498. PUGI_IMPL_FN xml_node_iterator& xml_node_iterator::operator--()
  5499. {
  5500. _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
  5501. return *this;
  5502. }
  5503. PUGI_IMPL_FN xml_node_iterator xml_node_iterator::operator--(int)
  5504. {
  5505. xml_node_iterator temp = *this;
  5506. --*this;
  5507. return temp;
  5508. }
  5509. PUGI_IMPL_FN xml_attribute_iterator::xml_attribute_iterator()
  5510. {
  5511. }
  5512. PUGI_IMPL_FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
  5513. {
  5514. }
  5515. PUGI_IMPL_FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
  5516. {
  5517. }
  5518. PUGI_IMPL_FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
  5519. {
  5520. return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
  5521. }
  5522. PUGI_IMPL_FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
  5523. {
  5524. return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
  5525. }
  5526. PUGI_IMPL_FN xml_attribute& xml_attribute_iterator::operator*() const
  5527. {
  5528. assert(_wrap._attr);
  5529. return _wrap;
  5530. }
  5531. PUGI_IMPL_FN xml_attribute* xml_attribute_iterator::operator->() const
  5532. {
  5533. assert(_wrap._attr);
  5534. return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
  5535. }
  5536. PUGI_IMPL_FN xml_attribute_iterator& xml_attribute_iterator::operator++()
  5537. {
  5538. assert(_wrap._attr);
  5539. _wrap._attr = _wrap._attr->next_attribute;
  5540. return *this;
  5541. }
  5542. PUGI_IMPL_FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
  5543. {
  5544. xml_attribute_iterator temp = *this;
  5545. ++*this;
  5546. return temp;
  5547. }
  5548. PUGI_IMPL_FN xml_attribute_iterator& xml_attribute_iterator::operator--()
  5549. {
  5550. _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
  5551. return *this;
  5552. }
  5553. PUGI_IMPL_FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
  5554. {
  5555. xml_attribute_iterator temp = *this;
  5556. --*this;
  5557. return temp;
  5558. }
  5559. PUGI_IMPL_FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
  5560. {
  5561. }
  5562. PUGI_IMPL_FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
  5563. {
  5564. }
  5565. PUGI_IMPL_FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
  5566. {
  5567. }
  5568. PUGI_IMPL_FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
  5569. {
  5570. return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
  5571. }
  5572. PUGI_IMPL_FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
  5573. {
  5574. return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
  5575. }
  5576. PUGI_IMPL_FN xml_node& xml_named_node_iterator::operator*() const
  5577. {
  5578. assert(_wrap._root);
  5579. return _wrap;
  5580. }
  5581. PUGI_IMPL_FN xml_node* xml_named_node_iterator::operator->() const
  5582. {
  5583. assert(_wrap._root);
  5584. return const_cast<xml_node*>(&_wrap); // BCC5 workaround
  5585. }
  5586. PUGI_IMPL_FN xml_named_node_iterator& xml_named_node_iterator::operator++()
  5587. {
  5588. assert(_wrap._root);
  5589. _wrap = _wrap.next_sibling(_name);
  5590. return *this;
  5591. }
  5592. PUGI_IMPL_FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
  5593. {
  5594. xml_named_node_iterator temp = *this;
  5595. ++*this;
  5596. return temp;
  5597. }
  5598. PUGI_IMPL_FN xml_named_node_iterator& xml_named_node_iterator::operator--()
  5599. {
  5600. if (_wrap._root)
  5601. _wrap = _wrap.previous_sibling(_name);
  5602. else
  5603. {
  5604. _wrap = _parent.last_child();
  5605. if (!impl::strequal(_wrap.name(), _name))
  5606. _wrap = _wrap.previous_sibling(_name);
  5607. }
  5608. return *this;
  5609. }
  5610. PUGI_IMPL_FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
  5611. {
  5612. xml_named_node_iterator temp = *this;
  5613. --*this;
  5614. return temp;
  5615. }
  5616. PUGI_IMPL_FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
  5617. {
  5618. }
  5619. PUGI_IMPL_FN xml_parse_result::operator bool() const
  5620. {
  5621. return status == status_ok;
  5622. }
  5623. PUGI_IMPL_FN const char* xml_parse_result::description() const
  5624. {
  5625. switch (status)
  5626. {
  5627. case status_ok: return "No error";
  5628. case status_file_not_found: return "File was not found";
  5629. case status_io_error: return "Error reading from file/stream";
  5630. case status_out_of_memory: return "Could not allocate memory";
  5631. case status_internal_error: return "Internal error occurred";
  5632. case status_unrecognized_tag: return "Could not determine tag type";
  5633. case status_bad_pi: return "Error parsing document declaration/processing instruction";
  5634. case status_bad_comment: return "Error parsing comment";
  5635. case status_bad_cdata: return "Error parsing CDATA section";
  5636. case status_bad_doctype: return "Error parsing document type declaration";
  5637. case status_bad_pcdata: return "Error parsing PCDATA section";
  5638. case status_bad_start_element: return "Error parsing start element tag";
  5639. case status_bad_attribute: return "Error parsing element attribute";
  5640. case status_bad_end_element: return "Error parsing end element tag";
  5641. case status_end_element_mismatch: return "Start-end tags mismatch";
  5642. case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
  5643. case status_no_document_element: return "No document element found";
  5644. default: return "Unknown error";
  5645. }
  5646. }
  5647. PUGI_IMPL_FN xml_document::xml_document(): _buffer(0)
  5648. {
  5649. _create();
  5650. }
  5651. PUGI_IMPL_FN xml_document::~xml_document()
  5652. {
  5653. _destroy();
  5654. }
  5655. #ifdef PUGIXML_HAS_MOVE
  5656. PUGI_IMPL_FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
  5657. {
  5658. _create();
  5659. _move(rhs);
  5660. }
  5661. PUGI_IMPL_FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
  5662. {
  5663. if (this == &rhs) return *this;
  5664. _destroy();
  5665. _create();
  5666. _move(rhs);
  5667. return *this;
  5668. }
  5669. #endif
  5670. PUGI_IMPL_FN void xml_document::reset()
  5671. {
  5672. _destroy();
  5673. _create();
  5674. }
  5675. PUGI_IMPL_FN void xml_document::reset(const xml_document& proto)
  5676. {
  5677. reset();
  5678. impl::node_copy_tree(_root, proto._root);
  5679. }
  5680. PUGI_IMPL_FN void xml_document::_create()
  5681. {
  5682. assert(!_root);
  5683. #ifdef PUGIXML_COMPACT
  5684. // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
  5685. const size_t page_offset = sizeof(void*);
  5686. #else
  5687. const size_t page_offset = 0;
  5688. #endif
  5689. // initialize sentinel page
  5690. PUGI_IMPL_STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
  5691. // prepare page structure
  5692. impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
  5693. assert(page);
  5694. page->busy_size = impl::xml_memory_page_size;
  5695. // setup first page marker
  5696. #ifdef PUGIXML_COMPACT
  5697. // round-trip through void* to avoid 'cast increases required alignment of target type' warning
  5698. page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
  5699. *page->compact_page_marker = sizeof(impl::xml_memory_page);
  5700. #endif
  5701. // allocate new root
  5702. _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
  5703. _root->prev_sibling_c = _root;
  5704. // setup sentinel page
  5705. page->allocator = static_cast<impl::xml_document_struct*>(_root);
  5706. // setup hash table pointer in allocator
  5707. #ifdef PUGIXML_COMPACT
  5708. page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
  5709. #endif
  5710. // verify the document allocation
  5711. assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
  5712. }
  5713. PUGI_IMPL_FN void xml_document::_destroy()
  5714. {
  5715. assert(_root);
  5716. // destroy static storage
  5717. if (_buffer)
  5718. {
  5719. impl::xml_memory::deallocate(_buffer);
  5720. _buffer = 0;
  5721. }
  5722. // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
  5723. for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
  5724. {
  5725. if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
  5726. }
  5727. // destroy dynamic storage, leave sentinel page (it's in static memory)
  5728. impl::xml_memory_page* root_page = PUGI_IMPL_GETPAGE(_root);
  5729. assert(root_page && !root_page->prev);
  5730. assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
  5731. for (impl::xml_memory_page* page = root_page->next; page; )
  5732. {
  5733. impl::xml_memory_page* next = page->next;
  5734. impl::xml_allocator::deallocate_page(page);
  5735. page = next;
  5736. }
  5737. #ifdef PUGIXML_COMPACT
  5738. // destroy hash table
  5739. static_cast<impl::xml_document_struct*>(_root)->hash.clear();
  5740. #endif
  5741. _root = 0;
  5742. }
  5743. #ifdef PUGIXML_HAS_MOVE
  5744. PUGI_IMPL_FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
  5745. {
  5746. impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
  5747. impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
  5748. // save first child pointer for later; this needs hash access
  5749. xml_node_struct* other_first_child = other->first_child;
  5750. #ifdef PUGIXML_COMPACT
  5751. // reserve space for the hash table up front; this is the only operation that can fail
  5752. // if it does, we have no choice but to throw (if we have exceptions)
  5753. if (other_first_child)
  5754. {
  5755. size_t other_children = 0;
  5756. for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
  5757. other_children++;
  5758. // in compact mode, each pointer assignment could result in a hash table request
  5759. // during move, we have to relocate document first_child and parents of all children
  5760. // normally there's just one child and its parent has a pointerless encoding but
  5761. // we assume the worst here
  5762. if (!other->_hash->reserve(other_children + 1))
  5763. {
  5764. #ifdef PUGIXML_NO_EXCEPTIONS
  5765. return;
  5766. #else
  5767. throw std::bad_alloc();
  5768. #endif
  5769. }
  5770. }
  5771. #endif
  5772. // move allocation state
  5773. // note that other->_root may point to the embedded document page, in which case we should keep original (empty) state
  5774. if (other->_root != PUGI_IMPL_GETPAGE(other))
  5775. {
  5776. doc->_root = other->_root;
  5777. doc->_busy_size = other->_busy_size;
  5778. }
  5779. // move buffer state
  5780. doc->buffer = other->buffer;
  5781. doc->extra_buffers = other->extra_buffers;
  5782. _buffer = rhs._buffer;
  5783. #ifdef PUGIXML_COMPACT
  5784. // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
  5785. doc->hash = other->hash;
  5786. doc->_hash = &doc->hash;
  5787. // make sure we don't access other hash up until the end when we reinitialize other document
  5788. other->_hash = 0;
  5789. #endif
  5790. // move page structure
  5791. impl::xml_memory_page* doc_page = PUGI_IMPL_GETPAGE(doc);
  5792. assert(doc_page && !doc_page->prev && !doc_page->next);
  5793. impl::xml_memory_page* other_page = PUGI_IMPL_GETPAGE(other);
  5794. assert(other_page && !other_page->prev);
  5795. // relink pages since root page is embedded into xml_document
  5796. if (impl::xml_memory_page* page = other_page->next)
  5797. {
  5798. assert(page->prev == other_page);
  5799. page->prev = doc_page;
  5800. doc_page->next = page;
  5801. other_page->next = 0;
  5802. }
  5803. // make sure pages point to the correct document state
  5804. for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
  5805. {
  5806. assert(page->allocator == other);
  5807. page->allocator = doc;
  5808. #ifdef PUGIXML_COMPACT
  5809. // this automatically migrates most children between documents and prevents ->parent assignment from allocating
  5810. if (page->compact_shared_parent == other)
  5811. page->compact_shared_parent = doc;
  5812. #endif
  5813. }
  5814. // move tree structure
  5815. assert(!doc->first_child);
  5816. doc->first_child = other_first_child;
  5817. for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
  5818. {
  5819. #ifdef PUGIXML_COMPACT
  5820. // most children will have migrated when we reassigned compact_shared_parent
  5821. assert(node->parent == other || node->parent == doc);
  5822. node->parent = doc;
  5823. #else
  5824. assert(node->parent == other);
  5825. node->parent = doc;
  5826. #endif
  5827. }
  5828. // reset other document
  5829. new (other) impl::xml_document_struct(PUGI_IMPL_GETPAGE(other));
  5830. rhs._buffer = 0;
  5831. }
  5832. #endif
  5833. #ifndef PUGIXML_NO_STL
  5834. PUGI_IMPL_FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
  5835. {
  5836. reset();
  5837. return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
  5838. }
  5839. PUGI_IMPL_FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
  5840. {
  5841. reset();
  5842. return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
  5843. }
  5844. #endif
  5845. PUGI_IMPL_FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
  5846. {
  5847. // Force native encoding (skip autodetection)
  5848. #ifdef PUGIXML_WCHAR_MODE
  5849. xml_encoding encoding = encoding_wchar;
  5850. #else
  5851. xml_encoding encoding = encoding_utf8;
  5852. #endif
  5853. return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
  5854. }
  5855. PUGI_IMPL_FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
  5856. {
  5857. return load_string(contents, options);
  5858. }
  5859. PUGI_IMPL_FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
  5860. {
  5861. reset();
  5862. using impl::auto_deleter; // MSVC7 workaround
  5863. auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file);
  5864. return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
  5865. }
  5866. PUGI_IMPL_FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
  5867. {
  5868. reset();
  5869. using impl::auto_deleter; // MSVC7 workaround
  5870. auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
  5871. return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
  5872. }
  5873. PUGI_IMPL_FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5874. {
  5875. reset();
  5876. return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
  5877. }
  5878. PUGI_IMPL_FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5879. {
  5880. reset();
  5881. return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
  5882. }
  5883. PUGI_IMPL_FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5884. {
  5885. reset();
  5886. return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
  5887. }
  5888. PUGI_IMPL_FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5889. {
  5890. impl::xml_buffered_writer buffered_writer(writer, encoding);
  5891. if ((flags & format_write_bom) && encoding != encoding_latin1)
  5892. {
  5893. // BOM always represents the codepoint U+FEFF, so just write it in native encoding
  5894. #ifdef PUGIXML_WCHAR_MODE
  5895. unsigned int bom = 0xfeff;
  5896. buffered_writer.write(static_cast<wchar_t>(bom));
  5897. #else
  5898. buffered_writer.write('\xef', '\xbb', '\xbf');
  5899. #endif
  5900. }
  5901. if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
  5902. {
  5903. buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
  5904. if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
  5905. buffered_writer.write('?', '>');
  5906. if (!(flags & format_raw)) buffered_writer.write('\n');
  5907. }
  5908. impl::node_output(buffered_writer, _root, indent, flags, 0);
  5909. buffered_writer.flush();
  5910. }
  5911. #ifndef PUGIXML_NO_STL
  5912. PUGI_IMPL_FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5913. {
  5914. xml_writer_stream writer(stream);
  5915. save(writer, indent, flags, encoding);
  5916. }
  5917. PUGI_IMPL_FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
  5918. {
  5919. xml_writer_stream writer(stream);
  5920. save(writer, indent, flags, encoding_wchar);
  5921. }
  5922. #endif
  5923. PUGI_IMPL_FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5924. {
  5925. using impl::auto_deleter; // MSVC7 workaround
  5926. auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
  5927. return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0;
  5928. }
  5929. PUGI_IMPL_FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5930. {
  5931. using impl::auto_deleter; // MSVC7 workaround
  5932. auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
  5933. return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0;
  5934. }
  5935. PUGI_IMPL_FN xml_node xml_document::document_element() const
  5936. {
  5937. assert(_root);
  5938. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  5939. if (PUGI_IMPL_NODETYPE(i) == node_element)
  5940. return xml_node(i);
  5941. return xml_node();
  5942. }
  5943. #ifndef PUGIXML_NO_STL
  5944. PUGI_IMPL_FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
  5945. {
  5946. assert(str);
  5947. return impl::as_utf8_impl(str, impl::strlength_wide(str));
  5948. }
  5949. PUGI_IMPL_FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
  5950. {
  5951. return impl::as_utf8_impl(str.c_str(), str.size());
  5952. }
  5953. PUGI_IMPL_FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
  5954. {
  5955. assert(str);
  5956. return impl::as_wide_impl(str, strlen(str));
  5957. }
  5958. PUGI_IMPL_FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
  5959. {
  5960. return impl::as_wide_impl(str.c_str(), str.size());
  5961. }
  5962. #endif
  5963. PUGI_IMPL_FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
  5964. {
  5965. impl::xml_memory::allocate = allocate;
  5966. impl::xml_memory::deallocate = deallocate;
  5967. }
  5968. PUGI_IMPL_FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
  5969. {
  5970. return impl::xml_memory::allocate;
  5971. }
  5972. PUGI_IMPL_FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
  5973. {
  5974. return impl::xml_memory::deallocate;
  5975. }
  5976. }
  5977. #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
  5978. namespace std
  5979. {
  5980. // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
  5981. PUGI_IMPL_FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
  5982. {
  5983. return std::bidirectional_iterator_tag();
  5984. }
  5985. PUGI_IMPL_FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
  5986. {
  5987. return std::bidirectional_iterator_tag();
  5988. }
  5989. PUGI_IMPL_FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
  5990. {
  5991. return std::bidirectional_iterator_tag();
  5992. }
  5993. }
  5994. #endif
  5995. #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
  5996. namespace std
  5997. {
  5998. // Workarounds for (non-standard) iterator category detection
  5999. PUGI_IMPL_FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
  6000. {
  6001. return std::bidirectional_iterator_tag();
  6002. }
  6003. PUGI_IMPL_FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
  6004. {
  6005. return std::bidirectional_iterator_tag();
  6006. }
  6007. PUGI_IMPL_FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
  6008. {
  6009. return std::bidirectional_iterator_tag();
  6010. }
  6011. }
  6012. #endif
  6013. #ifndef PUGIXML_NO_XPATH
  6014. // STL replacements
  6015. PUGI_IMPL_NS_BEGIN
  6016. struct equal_to
  6017. {
  6018. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  6019. {
  6020. return lhs == rhs;
  6021. }
  6022. };
  6023. struct not_equal_to
  6024. {
  6025. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  6026. {
  6027. return lhs != rhs;
  6028. }
  6029. };
  6030. struct less
  6031. {
  6032. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  6033. {
  6034. return lhs < rhs;
  6035. }
  6036. };
  6037. struct less_equal
  6038. {
  6039. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  6040. {
  6041. return lhs <= rhs;
  6042. }
  6043. };
  6044. template <typename T> inline void swap(T& lhs, T& rhs)
  6045. {
  6046. T temp = lhs;
  6047. lhs = rhs;
  6048. rhs = temp;
  6049. }
  6050. template <typename I, typename Pred> PUGI_IMPL_FN I min_element(I begin, I end, const Pred& pred)
  6051. {
  6052. I result = begin;
  6053. for (I it = begin + 1; it != end; ++it)
  6054. if (pred(*it, *result))
  6055. result = it;
  6056. return result;
  6057. }
  6058. template <typename I> PUGI_IMPL_FN void reverse(I begin, I end)
  6059. {
  6060. while (end - begin > 1)
  6061. swap(*begin++, *--end);
  6062. }
  6063. template <typename I> PUGI_IMPL_FN I unique(I begin, I end)
  6064. {
  6065. // fast skip head
  6066. while (end - begin > 1 && *begin != *(begin + 1))
  6067. begin++;
  6068. if (begin == end)
  6069. return begin;
  6070. // last written element
  6071. I write = begin++;
  6072. // merge unique elements
  6073. while (begin != end)
  6074. {
  6075. if (*begin != *write)
  6076. *++write = *begin++;
  6077. else
  6078. begin++;
  6079. }
  6080. // past-the-end (write points to live element)
  6081. return write + 1;
  6082. }
  6083. template <typename T, typename Pred> PUGI_IMPL_FN void insertion_sort(T* begin, T* end, const Pred& pred)
  6084. {
  6085. if (begin == end)
  6086. return;
  6087. for (T* it = begin + 1; it != end; ++it)
  6088. {
  6089. T val = *it;
  6090. T* hole = it;
  6091. // move hole backwards
  6092. while (hole > begin && pred(val, *(hole - 1)))
  6093. {
  6094. *hole = *(hole - 1);
  6095. hole--;
  6096. }
  6097. // fill hole with element
  6098. *hole = val;
  6099. }
  6100. }
  6101. template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred)
  6102. {
  6103. if (pred(*middle, *first))
  6104. swap(middle, first);
  6105. if (pred(*last, *middle))
  6106. swap(last, middle);
  6107. if (pred(*middle, *first))
  6108. swap(middle, first);
  6109. return middle;
  6110. }
  6111. template <typename T, typename Pred> PUGI_IMPL_FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
  6112. {
  6113. // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
  6114. T* eq = begin;
  6115. T* lt = begin;
  6116. T* gt = end;
  6117. while (lt < gt)
  6118. {
  6119. if (pred(*lt, pivot))
  6120. lt++;
  6121. else if (*lt == pivot)
  6122. swap(*eq++, *lt++);
  6123. else
  6124. swap(*lt, *--gt);
  6125. }
  6126. // we now have just 4 groups: = < >; move equal elements to the middle
  6127. T* eqbeg = gt;
  6128. for (T* it = begin; it != eq; ++it)
  6129. swap(*it, *--eqbeg);
  6130. *out_eqbeg = eqbeg;
  6131. *out_eqend = gt;
  6132. }
  6133. template <typename I, typename Pred> PUGI_IMPL_FN void sort(I begin, I end, const Pred& pred)
  6134. {
  6135. // sort large chunks
  6136. while (end - begin > 16)
  6137. {
  6138. // find median element
  6139. I middle = begin + (end - begin) / 2;
  6140. I median = median3(begin, middle, end - 1, pred);
  6141. // partition in three chunks (< = >)
  6142. I eqbeg, eqend;
  6143. partition3(begin, end, *median, pred, &eqbeg, &eqend);
  6144. // loop on larger half
  6145. if (eqbeg - begin > end - eqend)
  6146. {
  6147. sort(eqend, end, pred);
  6148. end = eqbeg;
  6149. }
  6150. else
  6151. {
  6152. sort(begin, eqbeg, pred);
  6153. begin = eqend;
  6154. }
  6155. }
  6156. // insertion sort small chunk
  6157. insertion_sort(begin, end, pred);
  6158. }
  6159. PUGI_IMPL_FN bool hash_insert(const void** table, size_t size, const void* key)
  6160. {
  6161. assert(key);
  6162. unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
  6163. // MurmurHash3 32-bit finalizer
  6164. h ^= h >> 16;
  6165. h *= 0x85ebca6bu;
  6166. h ^= h >> 13;
  6167. h *= 0xc2b2ae35u;
  6168. h ^= h >> 16;
  6169. size_t hashmod = size - 1;
  6170. size_t bucket = h & hashmod;
  6171. for (size_t probe = 0; probe <= hashmod; ++probe)
  6172. {
  6173. if (table[bucket] == 0)
  6174. {
  6175. table[bucket] = key;
  6176. return true;
  6177. }
  6178. if (table[bucket] == key)
  6179. return false;
  6180. // hash collision, quadratic probing
  6181. bucket = (bucket + probe + 1) & hashmod;
  6182. }
  6183. assert(false && "Hash table is full"); // unreachable
  6184. return false;
  6185. }
  6186. PUGI_IMPL_NS_END
  6187. // Allocator used for AST and evaluation stacks
  6188. PUGI_IMPL_NS_BEGIN
  6189. static const size_t xpath_memory_page_size =
  6190. #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
  6191. PUGIXML_MEMORY_XPATH_PAGE_SIZE
  6192. #else
  6193. 4096
  6194. #endif
  6195. ;
  6196. static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
  6197. struct xpath_memory_block
  6198. {
  6199. xpath_memory_block* next;
  6200. size_t capacity;
  6201. union
  6202. {
  6203. char data[xpath_memory_page_size];
  6204. double alignment;
  6205. };
  6206. };
  6207. struct xpath_allocator
  6208. {
  6209. xpath_memory_block* _root;
  6210. size_t _root_size;
  6211. bool* _error;
  6212. xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
  6213. {
  6214. }
  6215. void* allocate(size_t size)
  6216. {
  6217. // round size up to block alignment boundary
  6218. size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
  6219. if (_root_size + size <= _root->capacity)
  6220. {
  6221. void* buf = &_root->data[0] + _root_size;
  6222. _root_size += size;
  6223. return buf;
  6224. }
  6225. else
  6226. {
  6227. // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
  6228. size_t block_capacity_base = sizeof(_root->data);
  6229. size_t block_capacity_req = size + block_capacity_base / 4;
  6230. size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
  6231. size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
  6232. xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
  6233. if (!block)
  6234. {
  6235. if (_error) *_error = true;
  6236. return 0;
  6237. }
  6238. block->next = _root;
  6239. block->capacity = block_capacity;
  6240. _root = block;
  6241. _root_size = size;
  6242. return block->data;
  6243. }
  6244. }
  6245. void* reallocate(void* ptr, size_t old_size, size_t new_size)
  6246. {
  6247. // round size up to block alignment boundary
  6248. old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
  6249. new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
  6250. // we can only reallocate the last object
  6251. assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
  6252. // try to reallocate the object inplace
  6253. if (ptr && _root_size - old_size + new_size <= _root->capacity)
  6254. {
  6255. _root_size = _root_size - old_size + new_size;
  6256. return ptr;
  6257. }
  6258. // allocate a new block
  6259. void* result = allocate(new_size);
  6260. if (!result) return 0;
  6261. // we have a new block
  6262. if (ptr)
  6263. {
  6264. // copy old data (we only support growing)
  6265. assert(new_size >= old_size);
  6266. memcpy(result, ptr, old_size);
  6267. // free the previous page if it had no other objects
  6268. assert(_root->data == result);
  6269. assert(_root->next);
  6270. if (_root->next->data == ptr)
  6271. {
  6272. // deallocate the whole page, unless it was the first one
  6273. xpath_memory_block* next = _root->next->next;
  6274. if (next)
  6275. {
  6276. xml_memory::deallocate(_root->next);
  6277. _root->next = next;
  6278. }
  6279. }
  6280. }
  6281. return result;
  6282. }
  6283. void revert(const xpath_allocator& state)
  6284. {
  6285. // free all new pages
  6286. xpath_memory_block* cur = _root;
  6287. while (cur != state._root)
  6288. {
  6289. xpath_memory_block* next = cur->next;
  6290. xml_memory::deallocate(cur);
  6291. cur = next;
  6292. }
  6293. // restore state
  6294. _root = state._root;
  6295. _root_size = state._root_size;
  6296. }
  6297. void release()
  6298. {
  6299. xpath_memory_block* cur = _root;
  6300. assert(cur);
  6301. while (cur->next)
  6302. {
  6303. xpath_memory_block* next = cur->next;
  6304. xml_memory::deallocate(cur);
  6305. cur = next;
  6306. }
  6307. }
  6308. };
  6309. struct xpath_allocator_capture
  6310. {
  6311. xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
  6312. {
  6313. }
  6314. ~xpath_allocator_capture()
  6315. {
  6316. _target->revert(_state);
  6317. }
  6318. xpath_allocator* _target;
  6319. xpath_allocator _state;
  6320. };
  6321. struct xpath_stack
  6322. {
  6323. xpath_allocator* result;
  6324. xpath_allocator* temp;
  6325. };
  6326. struct xpath_stack_data
  6327. {
  6328. xpath_memory_block blocks[2];
  6329. xpath_allocator result;
  6330. xpath_allocator temp;
  6331. xpath_stack stack;
  6332. bool oom;
  6333. xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
  6334. {
  6335. blocks[0].next = blocks[1].next = 0;
  6336. blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
  6337. stack.result = &result;
  6338. stack.temp = &temp;
  6339. }
  6340. ~xpath_stack_data()
  6341. {
  6342. result.release();
  6343. temp.release();
  6344. }
  6345. };
  6346. PUGI_IMPL_NS_END
  6347. // String class
  6348. PUGI_IMPL_NS_BEGIN
  6349. class xpath_string
  6350. {
  6351. const char_t* _buffer;
  6352. bool _uses_heap;
  6353. size_t _length_heap;
  6354. static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
  6355. {
  6356. char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
  6357. if (!result) return 0;
  6358. memcpy(result, string, length * sizeof(char_t));
  6359. result[length] = 0;
  6360. return result;
  6361. }
  6362. xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
  6363. {
  6364. }
  6365. public:
  6366. static xpath_string from_const(const char_t* str)
  6367. {
  6368. return xpath_string(str, false, 0);
  6369. }
  6370. static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
  6371. {
  6372. assert(begin <= end && *end == 0);
  6373. return xpath_string(begin, true, static_cast<size_t>(end - begin));
  6374. }
  6375. static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
  6376. {
  6377. assert(begin <= end);
  6378. if (begin == end)
  6379. return xpath_string();
  6380. size_t length = static_cast<size_t>(end - begin);
  6381. const char_t* data = duplicate_string(begin, length, alloc);
  6382. return data ? xpath_string(data, true, length) : xpath_string();
  6383. }
  6384. xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
  6385. {
  6386. }
  6387. void append(const xpath_string& o, xpath_allocator* alloc)
  6388. {
  6389. // skip empty sources
  6390. if (!*o._buffer) return;
  6391. // fast append for constant empty target and constant source
  6392. if (!*_buffer && !_uses_heap && !o._uses_heap)
  6393. {
  6394. _buffer = o._buffer;
  6395. }
  6396. else
  6397. {
  6398. // need to make heap copy
  6399. size_t target_length = length();
  6400. size_t source_length = o.length();
  6401. size_t result_length = target_length + source_length;
  6402. // allocate new buffer
  6403. char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
  6404. if (!result) return;
  6405. // append first string to the new buffer in case there was no reallocation
  6406. if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
  6407. // append second string to the new buffer
  6408. memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
  6409. result[result_length] = 0;
  6410. // finalize
  6411. _buffer = result;
  6412. _uses_heap = true;
  6413. _length_heap = result_length;
  6414. }
  6415. }
  6416. const char_t* c_str() const
  6417. {
  6418. return _buffer;
  6419. }
  6420. size_t length() const
  6421. {
  6422. return _uses_heap ? _length_heap : strlength(_buffer);
  6423. }
  6424. char_t* data(xpath_allocator* alloc)
  6425. {
  6426. // make private heap copy
  6427. if (!_uses_heap)
  6428. {
  6429. size_t length_ = strlength(_buffer);
  6430. const char_t* data_ = duplicate_string(_buffer, length_, alloc);
  6431. if (!data_) return 0;
  6432. _buffer = data_;
  6433. _uses_heap = true;
  6434. _length_heap = length_;
  6435. }
  6436. return const_cast<char_t*>(_buffer);
  6437. }
  6438. bool empty() const
  6439. {
  6440. return *_buffer == 0;
  6441. }
  6442. bool operator==(const xpath_string& o) const
  6443. {
  6444. return strequal(_buffer, o._buffer);
  6445. }
  6446. bool operator!=(const xpath_string& o) const
  6447. {
  6448. return !strequal(_buffer, o._buffer);
  6449. }
  6450. bool uses_heap() const
  6451. {
  6452. return _uses_heap;
  6453. }
  6454. };
  6455. PUGI_IMPL_NS_END
  6456. PUGI_IMPL_NS_BEGIN
  6457. PUGI_IMPL_FN bool starts_with(const char_t* string, const char_t* pattern)
  6458. {
  6459. while (*pattern && *string == *pattern)
  6460. {
  6461. string++;
  6462. pattern++;
  6463. }
  6464. return *pattern == 0;
  6465. }
  6466. PUGI_IMPL_FN const char_t* find_char(const char_t* s, char_t c)
  6467. {
  6468. #ifdef PUGIXML_WCHAR_MODE
  6469. return wcschr(s, c);
  6470. #else
  6471. return strchr(s, c);
  6472. #endif
  6473. }
  6474. PUGI_IMPL_FN const char_t* find_substring(const char_t* s, const char_t* p)
  6475. {
  6476. #ifdef PUGIXML_WCHAR_MODE
  6477. // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
  6478. return (*p == 0) ? s : wcsstr(s, p);
  6479. #else
  6480. return strstr(s, p);
  6481. #endif
  6482. }
  6483. // Converts symbol to lower case, if it is an ASCII one
  6484. PUGI_IMPL_FN char_t tolower_ascii(char_t ch)
  6485. {
  6486. return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
  6487. }
  6488. PUGI_IMPL_FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
  6489. {
  6490. if (na.attribute())
  6491. return xpath_string::from_const(na.attribute().value());
  6492. else
  6493. {
  6494. xml_node n = na.node();
  6495. switch (n.type())
  6496. {
  6497. case node_pcdata:
  6498. case node_cdata:
  6499. case node_comment:
  6500. case node_pi:
  6501. return xpath_string::from_const(n.value());
  6502. case node_document:
  6503. case node_element:
  6504. {
  6505. xpath_string result;
  6506. // element nodes can have value if parse_embed_pcdata was used
  6507. if (n.value()[0])
  6508. result.append(xpath_string::from_const(n.value()), alloc);
  6509. xml_node cur = n.first_child();
  6510. while (cur && cur != n)
  6511. {
  6512. if (cur.type() == node_pcdata || cur.type() == node_cdata)
  6513. result.append(xpath_string::from_const(cur.value()), alloc);
  6514. if (cur.first_child())
  6515. cur = cur.first_child();
  6516. else if (cur.next_sibling())
  6517. cur = cur.next_sibling();
  6518. else
  6519. {
  6520. while (!cur.next_sibling() && cur != n)
  6521. cur = cur.parent();
  6522. if (cur != n) cur = cur.next_sibling();
  6523. }
  6524. }
  6525. return result;
  6526. }
  6527. default:
  6528. return xpath_string();
  6529. }
  6530. }
  6531. }
  6532. PUGI_IMPL_FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
  6533. {
  6534. assert(ln->parent == rn->parent);
  6535. // there is no common ancestor (the shared parent is null), nodes are from different documents
  6536. if (!ln->parent) return ln < rn;
  6537. // determine sibling order
  6538. xml_node_struct* ls = ln;
  6539. xml_node_struct* rs = rn;
  6540. while (ls && rs)
  6541. {
  6542. if (ls == rn) return true;
  6543. if (rs == ln) return false;
  6544. ls = ls->next_sibling;
  6545. rs = rs->next_sibling;
  6546. }
  6547. // if rn sibling chain ended ln must be before rn
  6548. return !rs;
  6549. }
  6550. PUGI_IMPL_FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
  6551. {
  6552. // find common ancestor at the same depth, if any
  6553. xml_node_struct* lp = ln;
  6554. xml_node_struct* rp = rn;
  6555. while (lp && rp && lp->parent != rp->parent)
  6556. {
  6557. lp = lp->parent;
  6558. rp = rp->parent;
  6559. }
  6560. // parents are the same!
  6561. if (lp && rp) return node_is_before_sibling(lp, rp);
  6562. // nodes are at different depths, need to normalize heights
  6563. bool left_higher = !lp;
  6564. while (lp)
  6565. {
  6566. lp = lp->parent;
  6567. ln = ln->parent;
  6568. }
  6569. while (rp)
  6570. {
  6571. rp = rp->parent;
  6572. rn = rn->parent;
  6573. }
  6574. // one node is the ancestor of the other
  6575. if (ln == rn) return left_higher;
  6576. // find common ancestor... again
  6577. while (ln->parent != rn->parent)
  6578. {
  6579. ln = ln->parent;
  6580. rn = rn->parent;
  6581. }
  6582. return node_is_before_sibling(ln, rn);
  6583. }
  6584. PUGI_IMPL_FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
  6585. {
  6586. while (node && node != parent) node = node->parent;
  6587. return parent && node == parent;
  6588. }
  6589. PUGI_IMPL_FN const void* document_buffer_order(const xpath_node& xnode)
  6590. {
  6591. xml_node_struct* node = xnode.node().internal_object();
  6592. if (node)
  6593. {
  6594. if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
  6595. {
  6596. if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
  6597. if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
  6598. }
  6599. return 0;
  6600. }
  6601. xml_attribute_struct* attr = xnode.attribute().internal_object();
  6602. if (attr)
  6603. {
  6604. if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
  6605. {
  6606. if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
  6607. if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
  6608. }
  6609. return 0;
  6610. }
  6611. return 0;
  6612. }
  6613. struct document_order_comparator
  6614. {
  6615. bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
  6616. {
  6617. // optimized document order based check
  6618. const void* lo = document_buffer_order(lhs);
  6619. const void* ro = document_buffer_order(rhs);
  6620. if (lo && ro) return lo < ro;
  6621. // slow comparison
  6622. xml_node ln = lhs.node(), rn = rhs.node();
  6623. // compare attributes
  6624. if (lhs.attribute() && rhs.attribute())
  6625. {
  6626. // shared parent
  6627. if (lhs.parent() == rhs.parent())
  6628. {
  6629. // determine sibling order
  6630. for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
  6631. if (a == rhs.attribute())
  6632. return true;
  6633. return false;
  6634. }
  6635. // compare attribute parents
  6636. ln = lhs.parent();
  6637. rn = rhs.parent();
  6638. }
  6639. else if (lhs.attribute())
  6640. {
  6641. // attributes go after the parent element
  6642. if (lhs.parent() == rhs.node()) return false;
  6643. ln = lhs.parent();
  6644. }
  6645. else if (rhs.attribute())
  6646. {
  6647. // attributes go after the parent element
  6648. if (rhs.parent() == lhs.node()) return true;
  6649. rn = rhs.parent();
  6650. }
  6651. if (ln == rn) return false;
  6652. if (!ln || !rn) return ln < rn;
  6653. return node_is_before(ln.internal_object(), rn.internal_object());
  6654. }
  6655. };
  6656. PUGI_IMPL_FN double gen_nan()
  6657. {
  6658. #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
  6659. PUGI_IMPL_STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
  6660. typedef uint32_t UI; // BCC5 workaround
  6661. union { float f; UI i; } u;
  6662. u.i = 0x7fc00000;
  6663. return double(u.f);
  6664. #else
  6665. // fallback
  6666. const volatile double zero = 0.0;
  6667. return zero / zero;
  6668. #endif
  6669. }
  6670. PUGI_IMPL_FN bool is_nan(double value)
  6671. {
  6672. #if defined(PUGI_IMPL_MSVC_CRT_VERSION) || defined(__BORLANDC__)
  6673. return !!_isnan(value);
  6674. #elif defined(fpclassify) && defined(FP_NAN)
  6675. return fpclassify(value) == FP_NAN;
  6676. #else
  6677. // fallback
  6678. const volatile double v = value;
  6679. return v != v;
  6680. #endif
  6681. }
  6682. PUGI_IMPL_FN const char_t* convert_number_to_string_special(double value)
  6683. {
  6684. #if defined(PUGI_IMPL_MSVC_CRT_VERSION) || defined(__BORLANDC__)
  6685. if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
  6686. if (_isnan(value)) return PUGIXML_TEXT("NaN");
  6687. return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
  6688. #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
  6689. switch (fpclassify(value))
  6690. {
  6691. case FP_NAN:
  6692. return PUGIXML_TEXT("NaN");
  6693. case FP_INFINITE:
  6694. return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
  6695. case FP_ZERO:
  6696. return PUGIXML_TEXT("0");
  6697. default:
  6698. return 0;
  6699. }
  6700. #else
  6701. // fallback
  6702. const volatile double v = value;
  6703. if (v == 0) return PUGIXML_TEXT("0");
  6704. if (v != v) return PUGIXML_TEXT("NaN");
  6705. if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
  6706. return 0;
  6707. #endif
  6708. }
  6709. PUGI_IMPL_FN bool convert_number_to_boolean(double value)
  6710. {
  6711. return (value != 0 && !is_nan(value));
  6712. }
  6713. PUGI_IMPL_FN void truncate_zeros(char* begin, char* end)
  6714. {
  6715. while (begin != end && end[-1] == '0') end--;
  6716. *end = 0;
  6717. }
  6718. // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
  6719. #if defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400
  6720. PUGI_IMPL_FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
  6721. {
  6722. // get base values
  6723. int sign, exponent;
  6724. _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
  6725. // truncate redundant zeros
  6726. truncate_zeros(buffer, buffer + strlen(buffer));
  6727. // fill results
  6728. *out_mantissa = buffer;
  6729. *out_exponent = exponent;
  6730. }
  6731. #else
  6732. PUGI_IMPL_FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
  6733. {
  6734. // get a scientific notation value with IEEE DBL_DIG decimals
  6735. PUGI_IMPL_SNPRINTF(buffer, "%.*e", DBL_DIG, value);
  6736. // get the exponent (possibly negative)
  6737. char* exponent_string = strchr(buffer, 'e');
  6738. assert(exponent_string);
  6739. int exponent = atoi(exponent_string + 1);
  6740. // extract mantissa string: skip sign
  6741. char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
  6742. assert(mantissa[0] != '0' && (mantissa[1] == '.' || mantissa[1] == ','));
  6743. // divide mantissa by 10 to eliminate integer part
  6744. mantissa[1] = mantissa[0];
  6745. mantissa++;
  6746. exponent++;
  6747. // remove extra mantissa digits and zero-terminate mantissa
  6748. truncate_zeros(mantissa, exponent_string);
  6749. // fill results
  6750. *out_mantissa = mantissa;
  6751. *out_exponent = exponent;
  6752. }
  6753. #endif
  6754. PUGI_IMPL_FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
  6755. {
  6756. // try special number conversion
  6757. const char_t* special = convert_number_to_string_special(value);
  6758. if (special) return xpath_string::from_const(special);
  6759. // get mantissa + exponent form
  6760. char mantissa_buffer[32];
  6761. char* mantissa;
  6762. int exponent;
  6763. convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
  6764. // allocate a buffer of suitable length for the number
  6765. size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
  6766. char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
  6767. if (!result) return xpath_string();
  6768. // make the number!
  6769. char_t* s = result;
  6770. // sign
  6771. if (value < 0) *s++ = '-';
  6772. // integer part
  6773. if (exponent <= 0)
  6774. {
  6775. *s++ = '0';
  6776. }
  6777. else
  6778. {
  6779. while (exponent > 0)
  6780. {
  6781. assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
  6782. *s++ = *mantissa ? *mantissa++ : '0';
  6783. exponent--;
  6784. }
  6785. }
  6786. // fractional part
  6787. if (*mantissa)
  6788. {
  6789. // decimal point
  6790. *s++ = '.';
  6791. // extra zeroes from negative exponent
  6792. while (exponent < 0)
  6793. {
  6794. *s++ = '0';
  6795. exponent++;
  6796. }
  6797. // extra mantissa digits
  6798. while (*mantissa)
  6799. {
  6800. assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
  6801. *s++ = *mantissa++;
  6802. }
  6803. }
  6804. // zero-terminate
  6805. assert(s < result + result_size);
  6806. *s = 0;
  6807. return xpath_string::from_heap_preallocated(result, s);
  6808. }
  6809. PUGI_IMPL_FN bool check_string_to_number_format(const char_t* string)
  6810. {
  6811. // parse leading whitespace
  6812. while (PUGI_IMPL_IS_CHARTYPE(*string, ct_space)) ++string;
  6813. // parse sign
  6814. if (*string == '-') ++string;
  6815. if (!*string) return false;
  6816. // if there is no integer part, there should be a decimal part with at least one digit
  6817. if (!PUGI_IMPL_IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI_IMPL_IS_CHARTYPEX(string[1], ctx_digit))) return false;
  6818. // parse integer part
  6819. while (PUGI_IMPL_IS_CHARTYPEX(*string, ctx_digit)) ++string;
  6820. // parse decimal part
  6821. if (*string == '.')
  6822. {
  6823. ++string;
  6824. while (PUGI_IMPL_IS_CHARTYPEX(*string, ctx_digit)) ++string;
  6825. }
  6826. // parse trailing whitespace
  6827. while (PUGI_IMPL_IS_CHARTYPE(*string, ct_space)) ++string;
  6828. return *string == 0;
  6829. }
  6830. PUGI_IMPL_FN double convert_string_to_number(const char_t* string)
  6831. {
  6832. // check string format
  6833. if (!check_string_to_number_format(string)) return gen_nan();
  6834. // parse string
  6835. #ifdef PUGIXML_WCHAR_MODE
  6836. return wcstod(string, 0);
  6837. #else
  6838. return strtod(string, 0);
  6839. #endif
  6840. }
  6841. PUGI_IMPL_FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
  6842. {
  6843. size_t length = static_cast<size_t>(end - begin);
  6844. char_t* scratch = buffer;
  6845. if (length >= sizeof(buffer) / sizeof(buffer[0]))
  6846. {
  6847. // need to make dummy on-heap copy
  6848. scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  6849. if (!scratch) return false;
  6850. }
  6851. // copy string to zero-terminated buffer and perform conversion
  6852. memcpy(scratch, begin, length * sizeof(char_t));
  6853. scratch[length] = 0;
  6854. *out_result = convert_string_to_number(scratch);
  6855. // free dummy buffer
  6856. if (scratch != buffer) xml_memory::deallocate(scratch);
  6857. return true;
  6858. }
  6859. PUGI_IMPL_FN double round_nearest(double value)
  6860. {
  6861. return floor(value + 0.5);
  6862. }
  6863. PUGI_IMPL_FN double round_nearest_nzero(double value)
  6864. {
  6865. // same as round_nearest, but returns -0 for [-0.5, -0]
  6866. // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
  6867. return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
  6868. }
  6869. PUGI_IMPL_FN const char_t* qualified_name(const xpath_node& node)
  6870. {
  6871. return node.attribute() ? node.attribute().name() : node.node().name();
  6872. }
  6873. PUGI_IMPL_FN const char_t* local_name(const xpath_node& node)
  6874. {
  6875. const char_t* name = qualified_name(node);
  6876. const char_t* p = find_char(name, ':');
  6877. return p ? p + 1 : name;
  6878. }
  6879. struct namespace_uri_predicate
  6880. {
  6881. const char_t* prefix;
  6882. size_t prefix_length;
  6883. namespace_uri_predicate(const char_t* name)
  6884. {
  6885. const char_t* pos = find_char(name, ':');
  6886. prefix = pos ? name : 0;
  6887. prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
  6888. }
  6889. bool operator()(xml_attribute a) const
  6890. {
  6891. const char_t* name = a.name();
  6892. if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
  6893. return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
  6894. }
  6895. };
  6896. PUGI_IMPL_FN const char_t* namespace_uri(xml_node node)
  6897. {
  6898. namespace_uri_predicate pred = node.name();
  6899. xml_node p = node;
  6900. while (p)
  6901. {
  6902. xml_attribute a = p.find_attribute(pred);
  6903. if (a) return a.value();
  6904. p = p.parent();
  6905. }
  6906. return PUGIXML_TEXT("");
  6907. }
  6908. PUGI_IMPL_FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
  6909. {
  6910. namespace_uri_predicate pred = attr.name();
  6911. // Default namespace does not apply to attributes
  6912. if (!pred.prefix) return PUGIXML_TEXT("");
  6913. xml_node p = parent;
  6914. while (p)
  6915. {
  6916. xml_attribute a = p.find_attribute(pred);
  6917. if (a) return a.value();
  6918. p = p.parent();
  6919. }
  6920. return PUGIXML_TEXT("");
  6921. }
  6922. PUGI_IMPL_FN const char_t* namespace_uri(const xpath_node& node)
  6923. {
  6924. return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
  6925. }
  6926. PUGI_IMPL_FN char_t* normalize_space(char_t* buffer)
  6927. {
  6928. char_t* write = buffer;
  6929. for (char_t* it = buffer; *it; )
  6930. {
  6931. char_t ch = *it++;
  6932. if (PUGI_IMPL_IS_CHARTYPE(ch, ct_space))
  6933. {
  6934. // replace whitespace sequence with single space
  6935. while (PUGI_IMPL_IS_CHARTYPE(*it, ct_space)) it++;
  6936. // avoid leading spaces
  6937. if (write != buffer) *write++ = ' ';
  6938. }
  6939. else *write++ = ch;
  6940. }
  6941. // remove trailing space
  6942. if (write != buffer && PUGI_IMPL_IS_CHARTYPE(write[-1], ct_space)) write--;
  6943. // zero-terminate
  6944. *write = 0;
  6945. return write;
  6946. }
  6947. PUGI_IMPL_FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
  6948. {
  6949. char_t* write = buffer;
  6950. while (*buffer)
  6951. {
  6952. PUGI_IMPL_DMC_VOLATILE char_t ch = *buffer++;
  6953. const char_t* pos = find_char(from, ch);
  6954. if (!pos)
  6955. *write++ = ch; // do not process
  6956. else if (static_cast<size_t>(pos - from) < to_length)
  6957. *write++ = to[pos - from]; // replace
  6958. }
  6959. // zero-terminate
  6960. *write = 0;
  6961. return write;
  6962. }
  6963. PUGI_IMPL_FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
  6964. {
  6965. unsigned char table[128] = {0};
  6966. while (*from)
  6967. {
  6968. unsigned int fc = static_cast<unsigned int>(*from);
  6969. unsigned int tc = static_cast<unsigned int>(*to);
  6970. if (fc >= 128 || tc >= 128)
  6971. return 0;
  6972. // code=128 means "skip character"
  6973. if (!table[fc])
  6974. table[fc] = static_cast<unsigned char>(tc ? tc : 128);
  6975. from++;
  6976. if (tc) to++;
  6977. }
  6978. for (int i = 0; i < 128; ++i)
  6979. if (!table[i])
  6980. table[i] = static_cast<unsigned char>(i);
  6981. void* result = alloc->allocate(sizeof(table));
  6982. if (!result) return 0;
  6983. memcpy(result, table, sizeof(table));
  6984. return static_cast<unsigned char*>(result);
  6985. }
  6986. PUGI_IMPL_FN char_t* translate_table(char_t* buffer, const unsigned char* table)
  6987. {
  6988. char_t* write = buffer;
  6989. while (*buffer)
  6990. {
  6991. char_t ch = *buffer++;
  6992. unsigned int index = static_cast<unsigned int>(ch);
  6993. if (index < 128)
  6994. {
  6995. unsigned char code = table[index];
  6996. // code=128 means "skip character" (table size is 128 so 128 can be a special value)
  6997. // this code skips these characters without extra branches
  6998. *write = static_cast<char_t>(code);
  6999. write += 1 - (code >> 7);
  7000. }
  7001. else
  7002. {
  7003. *write++ = ch;
  7004. }
  7005. }
  7006. // zero-terminate
  7007. *write = 0;
  7008. return write;
  7009. }
  7010. inline bool is_xpath_attribute(const char_t* name)
  7011. {
  7012. return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
  7013. }
  7014. struct xpath_variable_boolean: xpath_variable
  7015. {
  7016. xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
  7017. {
  7018. }
  7019. bool value;
  7020. char_t name[1];
  7021. };
  7022. struct xpath_variable_number: xpath_variable
  7023. {
  7024. xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
  7025. {
  7026. }
  7027. double value;
  7028. char_t name[1];
  7029. };
  7030. struct xpath_variable_string: xpath_variable
  7031. {
  7032. xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
  7033. {
  7034. }
  7035. ~xpath_variable_string()
  7036. {
  7037. if (value) xml_memory::deallocate(value);
  7038. }
  7039. char_t* value;
  7040. char_t name[1];
  7041. };
  7042. struct xpath_variable_node_set: xpath_variable
  7043. {
  7044. xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
  7045. {
  7046. }
  7047. xpath_node_set value;
  7048. char_t name[1];
  7049. };
  7050. static const xpath_node_set dummy_node_set;
  7051. PUGI_IMPL_FN PUGI_IMPL_UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
  7052. {
  7053. // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
  7054. unsigned int result = 0;
  7055. while (*str)
  7056. {
  7057. result += static_cast<unsigned int>(*str++);
  7058. result += result << 10;
  7059. result ^= result >> 6;
  7060. }
  7061. result += result << 3;
  7062. result ^= result >> 11;
  7063. result += result << 15;
  7064. return result;
  7065. }
  7066. template <typename T> PUGI_IMPL_FN T* new_xpath_variable(const char_t* name)
  7067. {
  7068. size_t length = strlength(name);
  7069. if (length == 0) return 0; // empty variable names are invalid
  7070. // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
  7071. void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
  7072. if (!memory) return 0;
  7073. T* result = new (memory) T();
  7074. memcpy(result->name, name, (length + 1) * sizeof(char_t));
  7075. return result;
  7076. }
  7077. PUGI_IMPL_FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
  7078. {
  7079. switch (type)
  7080. {
  7081. case xpath_type_node_set:
  7082. return new_xpath_variable<xpath_variable_node_set>(name);
  7083. case xpath_type_number:
  7084. return new_xpath_variable<xpath_variable_number>(name);
  7085. case xpath_type_string:
  7086. return new_xpath_variable<xpath_variable_string>(name);
  7087. case xpath_type_boolean:
  7088. return new_xpath_variable<xpath_variable_boolean>(name);
  7089. default:
  7090. return 0;
  7091. }
  7092. }
  7093. template <typename T> PUGI_IMPL_FN void delete_xpath_variable(T* var)
  7094. {
  7095. var->~T();
  7096. xml_memory::deallocate(var);
  7097. }
  7098. PUGI_IMPL_FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
  7099. {
  7100. switch (type)
  7101. {
  7102. case xpath_type_node_set:
  7103. delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
  7104. break;
  7105. case xpath_type_number:
  7106. delete_xpath_variable(static_cast<xpath_variable_number*>(var));
  7107. break;
  7108. case xpath_type_string:
  7109. delete_xpath_variable(static_cast<xpath_variable_string*>(var));
  7110. break;
  7111. case xpath_type_boolean:
  7112. delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
  7113. break;
  7114. default:
  7115. assert(false && "Invalid variable type"); // unreachable
  7116. }
  7117. }
  7118. PUGI_IMPL_FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
  7119. {
  7120. switch (rhs->type())
  7121. {
  7122. case xpath_type_node_set:
  7123. return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
  7124. case xpath_type_number:
  7125. return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
  7126. case xpath_type_string:
  7127. return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
  7128. case xpath_type_boolean:
  7129. return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
  7130. default:
  7131. assert(false && "Invalid variable type"); // unreachable
  7132. return false;
  7133. }
  7134. }
  7135. PUGI_IMPL_FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
  7136. {
  7137. size_t length = static_cast<size_t>(end - begin);
  7138. char_t* scratch = buffer;
  7139. if (length >= sizeof(buffer) / sizeof(buffer[0]))
  7140. {
  7141. // need to make dummy on-heap copy
  7142. scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  7143. if (!scratch) return false;
  7144. }
  7145. // copy string to zero-terminated buffer and perform lookup
  7146. memcpy(scratch, begin, length * sizeof(char_t));
  7147. scratch[length] = 0;
  7148. *out_result = set->get(scratch);
  7149. // free dummy buffer
  7150. if (scratch != buffer) xml_memory::deallocate(scratch);
  7151. return true;
  7152. }
  7153. PUGI_IMPL_NS_END
  7154. // Internal node set class
  7155. PUGI_IMPL_NS_BEGIN
  7156. PUGI_IMPL_FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
  7157. {
  7158. if (end - begin < 2)
  7159. return xpath_node_set::type_sorted;
  7160. document_order_comparator cmp;
  7161. bool first = cmp(begin[0], begin[1]);
  7162. for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
  7163. if (cmp(it[0], it[1]) != first)
  7164. return xpath_node_set::type_unsorted;
  7165. return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
  7166. }
  7167. PUGI_IMPL_FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
  7168. {
  7169. xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
  7170. if (type == xpath_node_set::type_unsorted)
  7171. {
  7172. xpath_node_set::type_t sorted = xpath_get_order(begin, end);
  7173. if (sorted == xpath_node_set::type_unsorted)
  7174. {
  7175. sort(begin, end, document_order_comparator());
  7176. type = xpath_node_set::type_sorted;
  7177. }
  7178. else
  7179. type = sorted;
  7180. }
  7181. if (type != order) reverse(begin, end);
  7182. return order;
  7183. }
  7184. PUGI_IMPL_FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
  7185. {
  7186. if (begin == end) return xpath_node();
  7187. switch (type)
  7188. {
  7189. case xpath_node_set::type_sorted:
  7190. return *begin;
  7191. case xpath_node_set::type_sorted_reverse:
  7192. return *(end - 1);
  7193. case xpath_node_set::type_unsorted:
  7194. return *min_element(begin, end, document_order_comparator());
  7195. default:
  7196. assert(false && "Invalid node set type"); // unreachable
  7197. return xpath_node();
  7198. }
  7199. }
  7200. class xpath_node_set_raw
  7201. {
  7202. xpath_node_set::type_t _type;
  7203. xpath_node* _begin;
  7204. xpath_node* _end;
  7205. xpath_node* _eos;
  7206. public:
  7207. xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
  7208. {
  7209. }
  7210. xpath_node* begin() const
  7211. {
  7212. return _begin;
  7213. }
  7214. xpath_node* end() const
  7215. {
  7216. return _end;
  7217. }
  7218. bool empty() const
  7219. {
  7220. return _begin == _end;
  7221. }
  7222. size_t size() const
  7223. {
  7224. return static_cast<size_t>(_end - _begin);
  7225. }
  7226. xpath_node first() const
  7227. {
  7228. return xpath_first(_begin, _end, _type);
  7229. }
  7230. void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
  7231. void push_back(const xpath_node& node, xpath_allocator* alloc)
  7232. {
  7233. if (_end != _eos)
  7234. *_end++ = node;
  7235. else
  7236. push_back_grow(node, alloc);
  7237. }
  7238. void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
  7239. {
  7240. if (begin_ == end_) return;
  7241. size_t size_ = static_cast<size_t>(_end - _begin);
  7242. size_t capacity = static_cast<size_t>(_eos - _begin);
  7243. size_t count = static_cast<size_t>(end_ - begin_);
  7244. if (size_ + count > capacity)
  7245. {
  7246. // reallocate the old array or allocate a new one
  7247. xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
  7248. if (!data) return;
  7249. // finalize
  7250. _begin = data;
  7251. _end = data + size_;
  7252. _eos = data + size_ + count;
  7253. }
  7254. memcpy(_end, begin_, count * sizeof(xpath_node));
  7255. _end += count;
  7256. }
  7257. void sort_do()
  7258. {
  7259. _type = xpath_sort(_begin, _end, _type, false);
  7260. }
  7261. void truncate(xpath_node* pos)
  7262. {
  7263. assert(_begin <= pos && pos <= _end);
  7264. _end = pos;
  7265. }
  7266. void remove_duplicates(xpath_allocator* alloc)
  7267. {
  7268. if (_type == xpath_node_set::type_unsorted && _end - _begin > 2)
  7269. {
  7270. xpath_allocator_capture cr(alloc);
  7271. size_t size_ = static_cast<size_t>(_end - _begin);
  7272. size_t hash_size = 1;
  7273. while (hash_size < size_ + size_ / 2) hash_size *= 2;
  7274. const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**)));
  7275. if (!hash_data) return;
  7276. memset(hash_data, 0, hash_size * sizeof(const void**));
  7277. xpath_node* write = _begin;
  7278. for (xpath_node* it = _begin; it != _end; ++it)
  7279. {
  7280. const void* attr = it->attribute().internal_object();
  7281. const void* node = it->node().internal_object();
  7282. const void* key = attr ? attr : node;
  7283. if (key && hash_insert(hash_data, hash_size, key))
  7284. {
  7285. *write++ = *it;
  7286. }
  7287. }
  7288. _end = write;
  7289. }
  7290. else
  7291. {
  7292. _end = unique(_begin, _end);
  7293. }
  7294. }
  7295. xpath_node_set::type_t type() const
  7296. {
  7297. return _type;
  7298. }
  7299. void set_type(xpath_node_set::type_t value)
  7300. {
  7301. _type = value;
  7302. }
  7303. };
  7304. PUGI_IMPL_FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
  7305. {
  7306. size_t capacity = static_cast<size_t>(_eos - _begin);
  7307. // get new capacity (1.5x rule)
  7308. size_t new_capacity = capacity + capacity / 2 + 1;
  7309. // reallocate the old array or allocate a new one
  7310. xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
  7311. if (!data) return;
  7312. // finalize
  7313. _begin = data;
  7314. _end = data + capacity;
  7315. _eos = data + new_capacity;
  7316. // push
  7317. *_end++ = node;
  7318. }
  7319. PUGI_IMPL_NS_END
  7320. PUGI_IMPL_NS_BEGIN
  7321. struct xpath_context
  7322. {
  7323. xpath_node n;
  7324. size_t position, size;
  7325. xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
  7326. {
  7327. }
  7328. };
  7329. enum lexeme_t
  7330. {
  7331. lex_none = 0,
  7332. lex_equal,
  7333. lex_not_equal,
  7334. lex_less,
  7335. lex_greater,
  7336. lex_less_or_equal,
  7337. lex_greater_or_equal,
  7338. lex_plus,
  7339. lex_minus,
  7340. lex_multiply,
  7341. lex_union,
  7342. lex_var_ref,
  7343. lex_open_brace,
  7344. lex_close_brace,
  7345. lex_quoted_string,
  7346. lex_number,
  7347. lex_slash,
  7348. lex_double_slash,
  7349. lex_open_square_brace,
  7350. lex_close_square_brace,
  7351. lex_string,
  7352. lex_comma,
  7353. lex_axis_attribute,
  7354. lex_dot,
  7355. lex_double_dot,
  7356. lex_double_colon,
  7357. lex_eof
  7358. };
  7359. struct xpath_lexer_string
  7360. {
  7361. const char_t* begin;
  7362. const char_t* end;
  7363. xpath_lexer_string(): begin(0), end(0)
  7364. {
  7365. }
  7366. bool operator==(const char_t* other) const
  7367. {
  7368. size_t length = static_cast<size_t>(end - begin);
  7369. return strequalrange(other, begin, length);
  7370. }
  7371. };
  7372. class xpath_lexer
  7373. {
  7374. const char_t* _cur;
  7375. const char_t* _cur_lexeme_pos;
  7376. xpath_lexer_string _cur_lexeme_contents;
  7377. lexeme_t _cur_lexeme;
  7378. public:
  7379. explicit xpath_lexer(const char_t* query): _cur(query)
  7380. {
  7381. next();
  7382. }
  7383. const char_t* state() const
  7384. {
  7385. return _cur;
  7386. }
  7387. void next()
  7388. {
  7389. const char_t* cur = _cur;
  7390. while (PUGI_IMPL_IS_CHARTYPE(*cur, ct_space)) ++cur;
  7391. // save lexeme position for error reporting
  7392. _cur_lexeme_pos = cur;
  7393. switch (*cur)
  7394. {
  7395. case 0:
  7396. _cur_lexeme = lex_eof;
  7397. break;
  7398. case '>':
  7399. if (*(cur+1) == '=')
  7400. {
  7401. cur += 2;
  7402. _cur_lexeme = lex_greater_or_equal;
  7403. }
  7404. else
  7405. {
  7406. cur += 1;
  7407. _cur_lexeme = lex_greater;
  7408. }
  7409. break;
  7410. case '<':
  7411. if (*(cur+1) == '=')
  7412. {
  7413. cur += 2;
  7414. _cur_lexeme = lex_less_or_equal;
  7415. }
  7416. else
  7417. {
  7418. cur += 1;
  7419. _cur_lexeme = lex_less;
  7420. }
  7421. break;
  7422. case '!':
  7423. if (*(cur+1) == '=')
  7424. {
  7425. cur += 2;
  7426. _cur_lexeme = lex_not_equal;
  7427. }
  7428. else
  7429. {
  7430. _cur_lexeme = lex_none;
  7431. }
  7432. break;
  7433. case '=':
  7434. cur += 1;
  7435. _cur_lexeme = lex_equal;
  7436. break;
  7437. case '+':
  7438. cur += 1;
  7439. _cur_lexeme = lex_plus;
  7440. break;
  7441. case '-':
  7442. cur += 1;
  7443. _cur_lexeme = lex_minus;
  7444. break;
  7445. case '*':
  7446. cur += 1;
  7447. _cur_lexeme = lex_multiply;
  7448. break;
  7449. case '|':
  7450. cur += 1;
  7451. _cur_lexeme = lex_union;
  7452. break;
  7453. case '$':
  7454. cur += 1;
  7455. if (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_start_symbol))
  7456. {
  7457. _cur_lexeme_contents.begin = cur;
  7458. while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7459. if (cur[0] == ':' && PUGI_IMPL_IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
  7460. {
  7461. cur++; // :
  7462. while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7463. }
  7464. _cur_lexeme_contents.end = cur;
  7465. _cur_lexeme = lex_var_ref;
  7466. }
  7467. else
  7468. {
  7469. _cur_lexeme = lex_none;
  7470. }
  7471. break;
  7472. case '(':
  7473. cur += 1;
  7474. _cur_lexeme = lex_open_brace;
  7475. break;
  7476. case ')':
  7477. cur += 1;
  7478. _cur_lexeme = lex_close_brace;
  7479. break;
  7480. case '[':
  7481. cur += 1;
  7482. _cur_lexeme = lex_open_square_brace;
  7483. break;
  7484. case ']':
  7485. cur += 1;
  7486. _cur_lexeme = lex_close_square_brace;
  7487. break;
  7488. case ',':
  7489. cur += 1;
  7490. _cur_lexeme = lex_comma;
  7491. break;
  7492. case '/':
  7493. if (*(cur+1) == '/')
  7494. {
  7495. cur += 2;
  7496. _cur_lexeme = lex_double_slash;
  7497. }
  7498. else
  7499. {
  7500. cur += 1;
  7501. _cur_lexeme = lex_slash;
  7502. }
  7503. break;
  7504. case '.':
  7505. if (*(cur+1) == '.')
  7506. {
  7507. cur += 2;
  7508. _cur_lexeme = lex_double_dot;
  7509. }
  7510. else if (PUGI_IMPL_IS_CHARTYPEX(*(cur+1), ctx_digit))
  7511. {
  7512. _cur_lexeme_contents.begin = cur; // .
  7513. ++cur;
  7514. while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_digit)) cur++;
  7515. _cur_lexeme_contents.end = cur;
  7516. _cur_lexeme = lex_number;
  7517. }
  7518. else
  7519. {
  7520. cur += 1;
  7521. _cur_lexeme = lex_dot;
  7522. }
  7523. break;
  7524. case '@':
  7525. cur += 1;
  7526. _cur_lexeme = lex_axis_attribute;
  7527. break;
  7528. case '"':
  7529. case '\'':
  7530. {
  7531. char_t terminator = *cur;
  7532. ++cur;
  7533. _cur_lexeme_contents.begin = cur;
  7534. while (*cur && *cur != terminator) cur++;
  7535. _cur_lexeme_contents.end = cur;
  7536. if (!*cur)
  7537. _cur_lexeme = lex_none;
  7538. else
  7539. {
  7540. cur += 1;
  7541. _cur_lexeme = lex_quoted_string;
  7542. }
  7543. break;
  7544. }
  7545. case ':':
  7546. if (*(cur+1) == ':')
  7547. {
  7548. cur += 2;
  7549. _cur_lexeme = lex_double_colon;
  7550. }
  7551. else
  7552. {
  7553. _cur_lexeme = lex_none;
  7554. }
  7555. break;
  7556. default:
  7557. if (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_digit))
  7558. {
  7559. _cur_lexeme_contents.begin = cur;
  7560. while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_digit)) cur++;
  7561. if (*cur == '.')
  7562. {
  7563. cur++;
  7564. while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_digit)) cur++;
  7565. }
  7566. _cur_lexeme_contents.end = cur;
  7567. _cur_lexeme = lex_number;
  7568. }
  7569. else if (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_start_symbol))
  7570. {
  7571. _cur_lexeme_contents.begin = cur;
  7572. while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7573. if (cur[0] == ':')
  7574. {
  7575. if (cur[1] == '*') // namespace test ncname:*
  7576. {
  7577. cur += 2; // :*
  7578. }
  7579. else if (PUGI_IMPL_IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
  7580. {
  7581. cur++; // :
  7582. while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7583. }
  7584. }
  7585. _cur_lexeme_contents.end = cur;
  7586. _cur_lexeme = lex_string;
  7587. }
  7588. else
  7589. {
  7590. _cur_lexeme = lex_none;
  7591. }
  7592. }
  7593. _cur = cur;
  7594. }
  7595. lexeme_t current() const
  7596. {
  7597. return _cur_lexeme;
  7598. }
  7599. const char_t* current_pos() const
  7600. {
  7601. return _cur_lexeme_pos;
  7602. }
  7603. const xpath_lexer_string& contents() const
  7604. {
  7605. assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
  7606. return _cur_lexeme_contents;
  7607. }
  7608. };
  7609. enum ast_type_t
  7610. {
  7611. ast_unknown,
  7612. ast_op_or, // left or right
  7613. ast_op_and, // left and right
  7614. ast_op_equal, // left = right
  7615. ast_op_not_equal, // left != right
  7616. ast_op_less, // left < right
  7617. ast_op_greater, // left > right
  7618. ast_op_less_or_equal, // left <= right
  7619. ast_op_greater_or_equal, // left >= right
  7620. ast_op_add, // left + right
  7621. ast_op_subtract, // left - right
  7622. ast_op_multiply, // left * right
  7623. ast_op_divide, // left / right
  7624. ast_op_mod, // left % right
  7625. ast_op_negate, // left - right
  7626. ast_op_union, // left | right
  7627. ast_predicate, // apply predicate to set; next points to next predicate
  7628. ast_filter, // select * from left where right
  7629. ast_string_constant, // string constant
  7630. ast_number_constant, // number constant
  7631. ast_variable, // variable
  7632. ast_func_last, // last()
  7633. ast_func_position, // position()
  7634. ast_func_count, // count(left)
  7635. ast_func_id, // id(left)
  7636. ast_func_local_name_0, // local-name()
  7637. ast_func_local_name_1, // local-name(left)
  7638. ast_func_namespace_uri_0, // namespace-uri()
  7639. ast_func_namespace_uri_1, // namespace-uri(left)
  7640. ast_func_name_0, // name()
  7641. ast_func_name_1, // name(left)
  7642. ast_func_string_0, // string()
  7643. ast_func_string_1, // string(left)
  7644. ast_func_concat, // concat(left, right, siblings)
  7645. ast_func_starts_with, // starts_with(left, right)
  7646. ast_func_contains, // contains(left, right)
  7647. ast_func_substring_before, // substring-before(left, right)
  7648. ast_func_substring_after, // substring-after(left, right)
  7649. ast_func_substring_2, // substring(left, right)
  7650. ast_func_substring_3, // substring(left, right, third)
  7651. ast_func_string_length_0, // string-length()
  7652. ast_func_string_length_1, // string-length(left)
  7653. ast_func_normalize_space_0, // normalize-space()
  7654. ast_func_normalize_space_1, // normalize-space(left)
  7655. ast_func_translate, // translate(left, right, third)
  7656. ast_func_boolean, // boolean(left)
  7657. ast_func_not, // not(left)
  7658. ast_func_true, // true()
  7659. ast_func_false, // false()
  7660. ast_func_lang, // lang(left)
  7661. ast_func_number_0, // number()
  7662. ast_func_number_1, // number(left)
  7663. ast_func_sum, // sum(left)
  7664. ast_func_floor, // floor(left)
  7665. ast_func_ceiling, // ceiling(left)
  7666. ast_func_round, // round(left)
  7667. ast_step, // process set left with step
  7668. ast_step_root, // select root node
  7669. ast_opt_translate_table, // translate(left, right, third) where right/third are constants
  7670. ast_opt_compare_attribute // @name = 'string'
  7671. };
  7672. enum axis_t
  7673. {
  7674. axis_ancestor,
  7675. axis_ancestor_or_self,
  7676. axis_attribute,
  7677. axis_child,
  7678. axis_descendant,
  7679. axis_descendant_or_self,
  7680. axis_following,
  7681. axis_following_sibling,
  7682. axis_namespace,
  7683. axis_parent,
  7684. axis_preceding,
  7685. axis_preceding_sibling,
  7686. axis_self
  7687. };
  7688. enum nodetest_t
  7689. {
  7690. nodetest_none,
  7691. nodetest_name,
  7692. nodetest_type_node,
  7693. nodetest_type_comment,
  7694. nodetest_type_pi,
  7695. nodetest_type_text,
  7696. nodetest_pi,
  7697. nodetest_all,
  7698. nodetest_all_in_namespace
  7699. };
  7700. enum predicate_t
  7701. {
  7702. predicate_default,
  7703. predicate_posinv,
  7704. predicate_constant,
  7705. predicate_constant_one
  7706. };
  7707. enum nodeset_eval_t
  7708. {
  7709. nodeset_eval_all,
  7710. nodeset_eval_any,
  7711. nodeset_eval_first
  7712. };
  7713. template <axis_t N> struct axis_to_type
  7714. {
  7715. static const axis_t axis;
  7716. };
  7717. template <axis_t N> const axis_t axis_to_type<N>::axis = N;
  7718. class xpath_ast_node
  7719. {
  7720. private:
  7721. // node type
  7722. char _type;
  7723. char _rettype;
  7724. // for ast_step
  7725. char _axis;
  7726. // for ast_step/ast_predicate/ast_filter
  7727. char _test;
  7728. // tree node structure
  7729. xpath_ast_node* _left;
  7730. xpath_ast_node* _right;
  7731. xpath_ast_node* _next;
  7732. union
  7733. {
  7734. // value for ast_string_constant
  7735. const char_t* string;
  7736. // value for ast_number_constant
  7737. double number;
  7738. // variable for ast_variable
  7739. xpath_variable* variable;
  7740. // node test for ast_step (node name/namespace/node type/pi target)
  7741. const char_t* nodetest;
  7742. // table for ast_opt_translate_table
  7743. const unsigned char* table;
  7744. } _data;
  7745. xpath_ast_node(const xpath_ast_node&);
  7746. xpath_ast_node& operator=(const xpath_ast_node&);
  7747. template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
  7748. {
  7749. xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
  7750. if (lt != xpath_type_node_set && rt != xpath_type_node_set)
  7751. {
  7752. if (lt == xpath_type_boolean || rt == xpath_type_boolean)
  7753. return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
  7754. else if (lt == xpath_type_number || rt == xpath_type_number)
  7755. return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
  7756. else if (lt == xpath_type_string || rt == xpath_type_string)
  7757. {
  7758. xpath_allocator_capture cr(stack.result);
  7759. xpath_string ls = lhs->eval_string(c, stack);
  7760. xpath_string rs = rhs->eval_string(c, stack);
  7761. return comp(ls, rs);
  7762. }
  7763. }
  7764. else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
  7765. {
  7766. xpath_allocator_capture cr(stack.result);
  7767. xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
  7768. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7769. for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
  7770. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7771. {
  7772. xpath_allocator_capture cri(stack.result);
  7773. if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
  7774. return true;
  7775. }
  7776. return false;
  7777. }
  7778. else
  7779. {
  7780. if (lt == xpath_type_node_set)
  7781. {
  7782. swap(lhs, rhs);
  7783. swap(lt, rt);
  7784. }
  7785. if (lt == xpath_type_boolean)
  7786. return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
  7787. else if (lt == xpath_type_number)
  7788. {
  7789. xpath_allocator_capture cr(stack.result);
  7790. double l = lhs->eval_number(c, stack);
  7791. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7792. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7793. {
  7794. xpath_allocator_capture cri(stack.result);
  7795. if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
  7796. return true;
  7797. }
  7798. return false;
  7799. }
  7800. else if (lt == xpath_type_string)
  7801. {
  7802. xpath_allocator_capture cr(stack.result);
  7803. xpath_string l = lhs->eval_string(c, stack);
  7804. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7805. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7806. {
  7807. xpath_allocator_capture cri(stack.result);
  7808. if (comp(l, string_value(*ri, stack.result)))
  7809. return true;
  7810. }
  7811. return false;
  7812. }
  7813. }
  7814. assert(false && "Wrong types"); // unreachable
  7815. return false;
  7816. }
  7817. static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
  7818. {
  7819. return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
  7820. }
  7821. template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
  7822. {
  7823. xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
  7824. if (lt != xpath_type_node_set && rt != xpath_type_node_set)
  7825. return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
  7826. else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
  7827. {
  7828. xpath_allocator_capture cr(stack.result);
  7829. xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
  7830. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7831. for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
  7832. {
  7833. xpath_allocator_capture cri(stack.result);
  7834. double l = convert_string_to_number(string_value(*li, stack.result).c_str());
  7835. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7836. {
  7837. xpath_allocator_capture crii(stack.result);
  7838. if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
  7839. return true;
  7840. }
  7841. }
  7842. return false;
  7843. }
  7844. else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
  7845. {
  7846. xpath_allocator_capture cr(stack.result);
  7847. double l = lhs->eval_number(c, stack);
  7848. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7849. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7850. {
  7851. xpath_allocator_capture cri(stack.result);
  7852. if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
  7853. return true;
  7854. }
  7855. return false;
  7856. }
  7857. else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
  7858. {
  7859. xpath_allocator_capture cr(stack.result);
  7860. xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
  7861. double r = rhs->eval_number(c, stack);
  7862. for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
  7863. {
  7864. xpath_allocator_capture cri(stack.result);
  7865. if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
  7866. return true;
  7867. }
  7868. return false;
  7869. }
  7870. else
  7871. {
  7872. assert(false && "Wrong types"); // unreachable
  7873. return false;
  7874. }
  7875. }
  7876. static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
  7877. {
  7878. assert(ns.size() >= first);
  7879. assert(expr->rettype() != xpath_type_number);
  7880. size_t i = 1;
  7881. size_t size = ns.size() - first;
  7882. xpath_node* last = ns.begin() + first;
  7883. // remove_if... or well, sort of
  7884. for (xpath_node* it = last; it != ns.end(); ++it, ++i)
  7885. {
  7886. xpath_context c(*it, i, size);
  7887. if (expr->eval_boolean(c, stack))
  7888. {
  7889. *last++ = *it;
  7890. if (once) break;
  7891. }
  7892. }
  7893. ns.truncate(last);
  7894. }
  7895. static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
  7896. {
  7897. assert(ns.size() >= first);
  7898. assert(expr->rettype() == xpath_type_number);
  7899. size_t i = 1;
  7900. size_t size = ns.size() - first;
  7901. xpath_node* last = ns.begin() + first;
  7902. // remove_if... or well, sort of
  7903. for (xpath_node* it = last; it != ns.end(); ++it, ++i)
  7904. {
  7905. xpath_context c(*it, i, size);
  7906. if (expr->eval_number(c, stack) == static_cast<double>(i))
  7907. {
  7908. *last++ = *it;
  7909. if (once) break;
  7910. }
  7911. }
  7912. ns.truncate(last);
  7913. }
  7914. static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
  7915. {
  7916. assert(ns.size() >= first);
  7917. assert(expr->rettype() == xpath_type_number);
  7918. size_t size = ns.size() - first;
  7919. xpath_node* last = ns.begin() + first;
  7920. xpath_node cn;
  7921. xpath_context c(cn, 1, size);
  7922. double er = expr->eval_number(c, stack);
  7923. if (er >= 1.0 && er <= static_cast<double>(size))
  7924. {
  7925. size_t eri = static_cast<size_t>(er);
  7926. if (er == static_cast<double>(eri))
  7927. {
  7928. xpath_node r = last[eri - 1];
  7929. *last++ = r;
  7930. }
  7931. }
  7932. ns.truncate(last);
  7933. }
  7934. void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
  7935. {
  7936. if (ns.size() == first) return;
  7937. assert(_type == ast_filter || _type == ast_predicate);
  7938. if (_test == predicate_constant || _test == predicate_constant_one)
  7939. apply_predicate_number_const(ns, first, _right, stack);
  7940. else if (_right->rettype() == xpath_type_number)
  7941. apply_predicate_number(ns, first, _right, stack, once);
  7942. else
  7943. apply_predicate_boolean(ns, first, _right, stack, once);
  7944. }
  7945. void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
  7946. {
  7947. if (ns.size() == first) return;
  7948. bool last_once = eval_once(ns.type(), eval);
  7949. for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
  7950. pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
  7951. }
  7952. bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
  7953. {
  7954. assert(a);
  7955. const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
  7956. switch (_test)
  7957. {
  7958. case nodetest_name:
  7959. if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
  7960. {
  7961. ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
  7962. return true;
  7963. }
  7964. break;
  7965. case nodetest_type_node:
  7966. case nodetest_all:
  7967. if (is_xpath_attribute(name))
  7968. {
  7969. ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
  7970. return true;
  7971. }
  7972. break;
  7973. case nodetest_all_in_namespace:
  7974. if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
  7975. {
  7976. ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
  7977. return true;
  7978. }
  7979. break;
  7980. default:
  7981. ;
  7982. }
  7983. return false;
  7984. }
  7985. bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
  7986. {
  7987. assert(n);
  7988. xml_node_type type = PUGI_IMPL_NODETYPE(n);
  7989. switch (_test)
  7990. {
  7991. case nodetest_name:
  7992. if (type == node_element && n->name && strequal(n->name, _data.nodetest))
  7993. {
  7994. ns.push_back(xml_node(n), alloc);
  7995. return true;
  7996. }
  7997. break;
  7998. case nodetest_type_node:
  7999. ns.push_back(xml_node(n), alloc);
  8000. return true;
  8001. case nodetest_type_comment:
  8002. if (type == node_comment)
  8003. {
  8004. ns.push_back(xml_node(n), alloc);
  8005. return true;
  8006. }
  8007. break;
  8008. case nodetest_type_text:
  8009. if (type == node_pcdata || type == node_cdata)
  8010. {
  8011. ns.push_back(xml_node(n), alloc);
  8012. return true;
  8013. }
  8014. break;
  8015. case nodetest_type_pi:
  8016. if (type == node_pi)
  8017. {
  8018. ns.push_back(xml_node(n), alloc);
  8019. return true;
  8020. }
  8021. break;
  8022. case nodetest_pi:
  8023. if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
  8024. {
  8025. ns.push_back(xml_node(n), alloc);
  8026. return true;
  8027. }
  8028. break;
  8029. case nodetest_all:
  8030. if (type == node_element)
  8031. {
  8032. ns.push_back(xml_node(n), alloc);
  8033. return true;
  8034. }
  8035. break;
  8036. case nodetest_all_in_namespace:
  8037. if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
  8038. {
  8039. ns.push_back(xml_node(n), alloc);
  8040. return true;
  8041. }
  8042. break;
  8043. default:
  8044. assert(false && "Unknown axis"); // unreachable
  8045. }
  8046. return false;
  8047. }
  8048. template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
  8049. {
  8050. const axis_t axis = T::axis;
  8051. switch (axis)
  8052. {
  8053. case axis_attribute:
  8054. {
  8055. for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
  8056. if (step_push(ns, a, n, alloc) & once)
  8057. return;
  8058. break;
  8059. }
  8060. case axis_child:
  8061. {
  8062. for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
  8063. if (step_push(ns, c, alloc) & once)
  8064. return;
  8065. break;
  8066. }
  8067. case axis_descendant:
  8068. case axis_descendant_or_self:
  8069. {
  8070. if (axis == axis_descendant_or_self)
  8071. if (step_push(ns, n, alloc) & once)
  8072. return;
  8073. xml_node_struct* cur = n->first_child;
  8074. while (cur)
  8075. {
  8076. if (step_push(ns, cur, alloc) & once)
  8077. return;
  8078. if (cur->first_child)
  8079. cur = cur->first_child;
  8080. else
  8081. {
  8082. while (!cur->next_sibling)
  8083. {
  8084. cur = cur->parent;
  8085. if (cur == n) return;
  8086. }
  8087. cur = cur->next_sibling;
  8088. }
  8089. }
  8090. break;
  8091. }
  8092. case axis_following_sibling:
  8093. {
  8094. for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
  8095. if (step_push(ns, c, alloc) & once)
  8096. return;
  8097. break;
  8098. }
  8099. case axis_preceding_sibling:
  8100. {
  8101. for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
  8102. if (step_push(ns, c, alloc) & once)
  8103. return;
  8104. break;
  8105. }
  8106. case axis_following:
  8107. {
  8108. xml_node_struct* cur = n;
  8109. // exit from this node so that we don't include descendants
  8110. while (!cur->next_sibling)
  8111. {
  8112. cur = cur->parent;
  8113. if (!cur) return;
  8114. }
  8115. cur = cur->next_sibling;
  8116. while (cur)
  8117. {
  8118. if (step_push(ns, cur, alloc) & once)
  8119. return;
  8120. if (cur->first_child)
  8121. cur = cur->first_child;
  8122. else
  8123. {
  8124. while (!cur->next_sibling)
  8125. {
  8126. cur = cur->parent;
  8127. if (!cur) return;
  8128. }
  8129. cur = cur->next_sibling;
  8130. }
  8131. }
  8132. break;
  8133. }
  8134. case axis_preceding:
  8135. {
  8136. xml_node_struct* cur = n;
  8137. // exit from this node so that we don't include descendants
  8138. while (!cur->prev_sibling_c->next_sibling)
  8139. {
  8140. cur = cur->parent;
  8141. if (!cur) return;
  8142. }
  8143. cur = cur->prev_sibling_c;
  8144. while (cur)
  8145. {
  8146. if (cur->first_child)
  8147. cur = cur->first_child->prev_sibling_c;
  8148. else
  8149. {
  8150. // leaf node, can't be ancestor
  8151. if (step_push(ns, cur, alloc) & once)
  8152. return;
  8153. while (!cur->prev_sibling_c->next_sibling)
  8154. {
  8155. cur = cur->parent;
  8156. if (!cur) return;
  8157. if (!node_is_ancestor(cur, n))
  8158. if (step_push(ns, cur, alloc) & once)
  8159. return;
  8160. }
  8161. cur = cur->prev_sibling_c;
  8162. }
  8163. }
  8164. break;
  8165. }
  8166. case axis_ancestor:
  8167. case axis_ancestor_or_self:
  8168. {
  8169. if (axis == axis_ancestor_or_self)
  8170. if (step_push(ns, n, alloc) & once)
  8171. return;
  8172. xml_node_struct* cur = n->parent;
  8173. while (cur)
  8174. {
  8175. if (step_push(ns, cur, alloc) & once)
  8176. return;
  8177. cur = cur->parent;
  8178. }
  8179. break;
  8180. }
  8181. case axis_self:
  8182. {
  8183. step_push(ns, n, alloc);
  8184. break;
  8185. }
  8186. case axis_parent:
  8187. {
  8188. if (n->parent)
  8189. step_push(ns, n->parent, alloc);
  8190. break;
  8191. }
  8192. default:
  8193. assert(false && "Unimplemented axis"); // unreachable
  8194. }
  8195. }
  8196. template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
  8197. {
  8198. const axis_t axis = T::axis;
  8199. switch (axis)
  8200. {
  8201. case axis_ancestor:
  8202. case axis_ancestor_or_self:
  8203. {
  8204. if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
  8205. if (step_push(ns, a, p, alloc) & once)
  8206. return;
  8207. xml_node_struct* cur = p;
  8208. while (cur)
  8209. {
  8210. if (step_push(ns, cur, alloc) & once)
  8211. return;
  8212. cur = cur->parent;
  8213. }
  8214. break;
  8215. }
  8216. case axis_descendant_or_self:
  8217. case axis_self:
  8218. {
  8219. if (_test == nodetest_type_node) // reject attributes based on principal node type test
  8220. step_push(ns, a, p, alloc);
  8221. break;
  8222. }
  8223. case axis_following:
  8224. {
  8225. xml_node_struct* cur = p;
  8226. while (cur)
  8227. {
  8228. if (cur->first_child)
  8229. cur = cur->first_child;
  8230. else
  8231. {
  8232. while (!cur->next_sibling)
  8233. {
  8234. cur = cur->parent;
  8235. if (!cur) return;
  8236. }
  8237. cur = cur->next_sibling;
  8238. }
  8239. if (step_push(ns, cur, alloc) & once)
  8240. return;
  8241. }
  8242. break;
  8243. }
  8244. case axis_parent:
  8245. {
  8246. step_push(ns, p, alloc);
  8247. break;
  8248. }
  8249. case axis_preceding:
  8250. {
  8251. // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
  8252. step_fill(ns, p, alloc, once, v);
  8253. break;
  8254. }
  8255. default:
  8256. assert(false && "Unimplemented axis"); // unreachable
  8257. }
  8258. }
  8259. template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
  8260. {
  8261. const axis_t axis = T::axis;
  8262. const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
  8263. if (xn.node())
  8264. step_fill(ns, xn.node().internal_object(), alloc, once, v);
  8265. else if (axis_has_attributes && xn.attribute() && xn.parent())
  8266. step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
  8267. }
  8268. template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
  8269. {
  8270. const axis_t axis = T::axis;
  8271. const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
  8272. const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
  8273. bool once =
  8274. (axis == axis_attribute && _test == nodetest_name) ||
  8275. (!_right && eval_once(axis_type, eval)) ||
  8276. // coverity[mixed_enums]
  8277. (_right && !_right->_next && _right->_test == predicate_constant_one);
  8278. xpath_node_set_raw ns;
  8279. ns.set_type(axis_type);
  8280. if (_left)
  8281. {
  8282. xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
  8283. // self axis preserves the original order
  8284. if (axis == axis_self) ns.set_type(s.type());
  8285. for (const xpath_node* it = s.begin(); it != s.end(); ++it)
  8286. {
  8287. size_t size = ns.size();
  8288. // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
  8289. if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
  8290. step_fill(ns, *it, stack.result, once, v);
  8291. if (_right) apply_predicates(ns, size, stack, eval);
  8292. }
  8293. }
  8294. else
  8295. {
  8296. step_fill(ns, c.n, stack.result, once, v);
  8297. if (_right) apply_predicates(ns, 0, stack, eval);
  8298. }
  8299. // child, attribute and self axes always generate unique set of nodes
  8300. // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
  8301. if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
  8302. ns.remove_duplicates(stack.temp);
  8303. return ns;
  8304. }
  8305. public:
  8306. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
  8307. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
  8308. {
  8309. assert(type == ast_string_constant);
  8310. _data.string = value;
  8311. }
  8312. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
  8313. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
  8314. {
  8315. assert(type == ast_number_constant);
  8316. _data.number = value;
  8317. }
  8318. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
  8319. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
  8320. {
  8321. assert(type == ast_variable);
  8322. _data.variable = value;
  8323. }
  8324. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
  8325. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
  8326. {
  8327. }
  8328. xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
  8329. _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
  8330. {
  8331. assert(type == ast_step);
  8332. _data.nodetest = contents;
  8333. }
  8334. xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
  8335. _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
  8336. {
  8337. assert(type == ast_filter || type == ast_predicate);
  8338. }
  8339. void set_next(xpath_ast_node* value)
  8340. {
  8341. _next = value;
  8342. }
  8343. void set_right(xpath_ast_node* value)
  8344. {
  8345. _right = value;
  8346. }
  8347. bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
  8348. {
  8349. switch (_type)
  8350. {
  8351. case ast_op_or:
  8352. return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
  8353. case ast_op_and:
  8354. return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
  8355. case ast_op_equal:
  8356. return compare_eq(_left, _right, c, stack, equal_to());
  8357. case ast_op_not_equal:
  8358. return compare_eq(_left, _right, c, stack, not_equal_to());
  8359. case ast_op_less:
  8360. return compare_rel(_left, _right, c, stack, less());
  8361. case ast_op_greater:
  8362. return compare_rel(_right, _left, c, stack, less());
  8363. case ast_op_less_or_equal:
  8364. return compare_rel(_left, _right, c, stack, less_equal());
  8365. case ast_op_greater_or_equal:
  8366. return compare_rel(_right, _left, c, stack, less_equal());
  8367. case ast_func_starts_with:
  8368. {
  8369. xpath_allocator_capture cr(stack.result);
  8370. xpath_string lr = _left->eval_string(c, stack);
  8371. xpath_string rr = _right->eval_string(c, stack);
  8372. return starts_with(lr.c_str(), rr.c_str());
  8373. }
  8374. case ast_func_contains:
  8375. {
  8376. xpath_allocator_capture cr(stack.result);
  8377. xpath_string lr = _left->eval_string(c, stack);
  8378. xpath_string rr = _right->eval_string(c, stack);
  8379. return find_substring(lr.c_str(), rr.c_str()) != 0;
  8380. }
  8381. case ast_func_boolean:
  8382. return _left->eval_boolean(c, stack);
  8383. case ast_func_not:
  8384. return !_left->eval_boolean(c, stack);
  8385. case ast_func_true:
  8386. return true;
  8387. case ast_func_false:
  8388. return false;
  8389. case ast_func_lang:
  8390. {
  8391. if (c.n.attribute()) return false;
  8392. xpath_allocator_capture cr(stack.result);
  8393. xpath_string lang = _left->eval_string(c, stack);
  8394. for (xml_node n = c.n.node(); n; n = n.parent())
  8395. {
  8396. xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
  8397. if (a)
  8398. {
  8399. const char_t* value = a.value();
  8400. // strnicmp / strncasecmp is not portable
  8401. for (const char_t* lit = lang.c_str(); *lit; ++lit)
  8402. {
  8403. if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
  8404. ++value;
  8405. }
  8406. return *value == 0 || *value == '-';
  8407. }
  8408. }
  8409. return false;
  8410. }
  8411. case ast_opt_compare_attribute:
  8412. {
  8413. const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
  8414. xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
  8415. return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
  8416. }
  8417. case ast_variable:
  8418. {
  8419. assert(_rettype == _data.variable->type());
  8420. if (_rettype == xpath_type_boolean)
  8421. return _data.variable->get_boolean();
  8422. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8423. break;
  8424. }
  8425. default:
  8426. ;
  8427. }
  8428. // none of the ast types that return the value directly matched, we need to perform type conversion
  8429. switch (_rettype)
  8430. {
  8431. case xpath_type_number:
  8432. return convert_number_to_boolean(eval_number(c, stack));
  8433. case xpath_type_string:
  8434. {
  8435. xpath_allocator_capture cr(stack.result);
  8436. return !eval_string(c, stack).empty();
  8437. }
  8438. case xpath_type_node_set:
  8439. {
  8440. xpath_allocator_capture cr(stack.result);
  8441. return !eval_node_set(c, stack, nodeset_eval_any).empty();
  8442. }
  8443. default:
  8444. assert(false && "Wrong expression for return type boolean"); // unreachable
  8445. return false;
  8446. }
  8447. }
  8448. double eval_number(const xpath_context& c, const xpath_stack& stack)
  8449. {
  8450. switch (_type)
  8451. {
  8452. case ast_op_add:
  8453. return _left->eval_number(c, stack) + _right->eval_number(c, stack);
  8454. case ast_op_subtract:
  8455. return _left->eval_number(c, stack) - _right->eval_number(c, stack);
  8456. case ast_op_multiply:
  8457. return _left->eval_number(c, stack) * _right->eval_number(c, stack);
  8458. case ast_op_divide:
  8459. return _left->eval_number(c, stack) / _right->eval_number(c, stack);
  8460. case ast_op_mod:
  8461. return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
  8462. case ast_op_negate:
  8463. return -_left->eval_number(c, stack);
  8464. case ast_number_constant:
  8465. return _data.number;
  8466. case ast_func_last:
  8467. return static_cast<double>(c.size);
  8468. case ast_func_position:
  8469. return static_cast<double>(c.position);
  8470. case ast_func_count:
  8471. {
  8472. xpath_allocator_capture cr(stack.result);
  8473. return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
  8474. }
  8475. case ast_func_string_length_0:
  8476. {
  8477. xpath_allocator_capture cr(stack.result);
  8478. return static_cast<double>(string_value(c.n, stack.result).length());
  8479. }
  8480. case ast_func_string_length_1:
  8481. {
  8482. xpath_allocator_capture cr(stack.result);
  8483. return static_cast<double>(_left->eval_string(c, stack).length());
  8484. }
  8485. case ast_func_number_0:
  8486. {
  8487. xpath_allocator_capture cr(stack.result);
  8488. return convert_string_to_number(string_value(c.n, stack.result).c_str());
  8489. }
  8490. case ast_func_number_1:
  8491. return _left->eval_number(c, stack);
  8492. case ast_func_sum:
  8493. {
  8494. xpath_allocator_capture cr(stack.result);
  8495. double r = 0;
  8496. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
  8497. for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
  8498. {
  8499. xpath_allocator_capture cri(stack.result);
  8500. r += convert_string_to_number(string_value(*it, stack.result).c_str());
  8501. }
  8502. return r;
  8503. }
  8504. case ast_func_floor:
  8505. {
  8506. double r = _left->eval_number(c, stack);
  8507. return r == r ? floor(r) : r;
  8508. }
  8509. case ast_func_ceiling:
  8510. {
  8511. double r = _left->eval_number(c, stack);
  8512. return r == r ? ceil(r) : r;
  8513. }
  8514. case ast_func_round:
  8515. return round_nearest_nzero(_left->eval_number(c, stack));
  8516. case ast_variable:
  8517. {
  8518. assert(_rettype == _data.variable->type());
  8519. if (_rettype == xpath_type_number)
  8520. return _data.variable->get_number();
  8521. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8522. break;
  8523. }
  8524. default:
  8525. ;
  8526. }
  8527. // none of the ast types that return the value directly matched, we need to perform type conversion
  8528. switch (_rettype)
  8529. {
  8530. case xpath_type_boolean:
  8531. return eval_boolean(c, stack) ? 1 : 0;
  8532. case xpath_type_string:
  8533. {
  8534. xpath_allocator_capture cr(stack.result);
  8535. return convert_string_to_number(eval_string(c, stack).c_str());
  8536. }
  8537. case xpath_type_node_set:
  8538. {
  8539. xpath_allocator_capture cr(stack.result);
  8540. return convert_string_to_number(eval_string(c, stack).c_str());
  8541. }
  8542. default:
  8543. assert(false && "Wrong expression for return type number"); // unreachable
  8544. return 0;
  8545. }
  8546. }
  8547. xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
  8548. {
  8549. assert(_type == ast_func_concat);
  8550. xpath_allocator_capture ct(stack.temp);
  8551. // count the string number
  8552. size_t count = 1;
  8553. for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
  8554. // allocate a buffer for temporary string objects
  8555. xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
  8556. if (!buffer) return xpath_string();
  8557. // evaluate all strings to temporary stack
  8558. xpath_stack swapped_stack = {stack.temp, stack.result};
  8559. buffer[0] = _left->eval_string(c, swapped_stack);
  8560. size_t pos = 1;
  8561. for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
  8562. assert(pos == count);
  8563. // get total length
  8564. size_t length = 0;
  8565. for (size_t i = 0; i < count; ++i) length += buffer[i].length();
  8566. // create final string
  8567. char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
  8568. if (!result) return xpath_string();
  8569. char_t* ri = result;
  8570. for (size_t j = 0; j < count; ++j)
  8571. for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
  8572. *ri++ = *bi;
  8573. *ri = 0;
  8574. return xpath_string::from_heap_preallocated(result, ri);
  8575. }
  8576. xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
  8577. {
  8578. switch (_type)
  8579. {
  8580. case ast_string_constant:
  8581. return xpath_string::from_const(_data.string);
  8582. case ast_func_local_name_0:
  8583. {
  8584. xpath_node na = c.n;
  8585. return xpath_string::from_const(local_name(na));
  8586. }
  8587. case ast_func_local_name_1:
  8588. {
  8589. xpath_allocator_capture cr(stack.result);
  8590. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
  8591. xpath_node na = ns.first();
  8592. return xpath_string::from_const(local_name(na));
  8593. }
  8594. case ast_func_name_0:
  8595. {
  8596. xpath_node na = c.n;
  8597. return xpath_string::from_const(qualified_name(na));
  8598. }
  8599. case ast_func_name_1:
  8600. {
  8601. xpath_allocator_capture cr(stack.result);
  8602. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
  8603. xpath_node na = ns.first();
  8604. return xpath_string::from_const(qualified_name(na));
  8605. }
  8606. case ast_func_namespace_uri_0:
  8607. {
  8608. xpath_node na = c.n;
  8609. return xpath_string::from_const(namespace_uri(na));
  8610. }
  8611. case ast_func_namespace_uri_1:
  8612. {
  8613. xpath_allocator_capture cr(stack.result);
  8614. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
  8615. xpath_node na = ns.first();
  8616. return xpath_string::from_const(namespace_uri(na));
  8617. }
  8618. case ast_func_string_0:
  8619. return string_value(c.n, stack.result);
  8620. case ast_func_string_1:
  8621. return _left->eval_string(c, stack);
  8622. case ast_func_concat:
  8623. return eval_string_concat(c, stack);
  8624. case ast_func_substring_before:
  8625. {
  8626. xpath_allocator_capture cr(stack.temp);
  8627. xpath_stack swapped_stack = {stack.temp, stack.result};
  8628. xpath_string s = _left->eval_string(c, swapped_stack);
  8629. xpath_string p = _right->eval_string(c, swapped_stack);
  8630. const char_t* pos = find_substring(s.c_str(), p.c_str());
  8631. return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
  8632. }
  8633. case ast_func_substring_after:
  8634. {
  8635. xpath_allocator_capture cr(stack.temp);
  8636. xpath_stack swapped_stack = {stack.temp, stack.result};
  8637. xpath_string s = _left->eval_string(c, swapped_stack);
  8638. xpath_string p = _right->eval_string(c, swapped_stack);
  8639. const char_t* pos = find_substring(s.c_str(), p.c_str());
  8640. if (!pos) return xpath_string();
  8641. const char_t* rbegin = pos + p.length();
  8642. const char_t* rend = s.c_str() + s.length();
  8643. return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
  8644. }
  8645. case ast_func_substring_2:
  8646. {
  8647. xpath_allocator_capture cr(stack.temp);
  8648. xpath_stack swapped_stack = {stack.temp, stack.result};
  8649. xpath_string s = _left->eval_string(c, swapped_stack);
  8650. size_t s_length = s.length();
  8651. double first = round_nearest(_right->eval_number(c, stack));
  8652. if (is_nan(first)) return xpath_string(); // NaN
  8653. else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
  8654. size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
  8655. assert(1 <= pos && pos <= s_length + 1);
  8656. const char_t* rbegin = s.c_str() + (pos - 1);
  8657. const char_t* rend = s.c_str() + s.length();
  8658. return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
  8659. }
  8660. case ast_func_substring_3:
  8661. {
  8662. xpath_allocator_capture cr(stack.temp);
  8663. xpath_stack swapped_stack = {stack.temp, stack.result};
  8664. xpath_string s = _left->eval_string(c, swapped_stack);
  8665. size_t s_length = s.length();
  8666. double first = round_nearest(_right->eval_number(c, stack));
  8667. double last = first + round_nearest(_right->_next->eval_number(c, stack));
  8668. if (is_nan(first) || is_nan(last)) return xpath_string();
  8669. else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
  8670. else if (first >= last) return xpath_string();
  8671. else if (last < 1) return xpath_string();
  8672. size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
  8673. size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last);
  8674. assert(1 <= pos && pos <= end && end <= s_length + 1);
  8675. const char_t* rbegin = s.c_str() + (pos - 1);
  8676. const char_t* rend = s.c_str() + (end - 1);
  8677. return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
  8678. }
  8679. case ast_func_normalize_space_0:
  8680. {
  8681. xpath_string s = string_value(c.n, stack.result);
  8682. char_t* begin = s.data(stack.result);
  8683. if (!begin) return xpath_string();
  8684. char_t* end = normalize_space(begin);
  8685. return xpath_string::from_heap_preallocated(begin, end);
  8686. }
  8687. case ast_func_normalize_space_1:
  8688. {
  8689. xpath_string s = _left->eval_string(c, stack);
  8690. char_t* begin = s.data(stack.result);
  8691. if (!begin) return xpath_string();
  8692. char_t* end = normalize_space(begin);
  8693. return xpath_string::from_heap_preallocated(begin, end);
  8694. }
  8695. case ast_func_translate:
  8696. {
  8697. xpath_allocator_capture cr(stack.temp);
  8698. xpath_stack swapped_stack = {stack.temp, stack.result};
  8699. xpath_string s = _left->eval_string(c, stack);
  8700. xpath_string from = _right->eval_string(c, swapped_stack);
  8701. xpath_string to = _right->_next->eval_string(c, swapped_stack);
  8702. char_t* begin = s.data(stack.result);
  8703. if (!begin) return xpath_string();
  8704. char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
  8705. return xpath_string::from_heap_preallocated(begin, end);
  8706. }
  8707. case ast_opt_translate_table:
  8708. {
  8709. xpath_string s = _left->eval_string(c, stack);
  8710. char_t* begin = s.data(stack.result);
  8711. if (!begin) return xpath_string();
  8712. char_t* end = translate_table(begin, _data.table);
  8713. return xpath_string::from_heap_preallocated(begin, end);
  8714. }
  8715. case ast_variable:
  8716. {
  8717. assert(_rettype == _data.variable->type());
  8718. if (_rettype == xpath_type_string)
  8719. return xpath_string::from_const(_data.variable->get_string());
  8720. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8721. break;
  8722. }
  8723. default:
  8724. ;
  8725. }
  8726. // none of the ast types that return the value directly matched, we need to perform type conversion
  8727. switch (_rettype)
  8728. {
  8729. case xpath_type_boolean:
  8730. return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
  8731. case xpath_type_number:
  8732. return convert_number_to_string(eval_number(c, stack), stack.result);
  8733. case xpath_type_node_set:
  8734. {
  8735. xpath_allocator_capture cr(stack.temp);
  8736. xpath_stack swapped_stack = {stack.temp, stack.result};
  8737. xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
  8738. return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
  8739. }
  8740. default:
  8741. assert(false && "Wrong expression for return type string"); // unreachable
  8742. return xpath_string();
  8743. }
  8744. }
  8745. xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
  8746. {
  8747. switch (_type)
  8748. {
  8749. case ast_op_union:
  8750. {
  8751. xpath_allocator_capture cr(stack.temp);
  8752. xpath_stack swapped_stack = {stack.temp, stack.result};
  8753. xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval);
  8754. xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval);
  8755. // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
  8756. ls.set_type(xpath_node_set::type_unsorted);
  8757. ls.append(rs.begin(), rs.end(), stack.result);
  8758. ls.remove_duplicates(stack.temp);
  8759. return ls;
  8760. }
  8761. case ast_filter:
  8762. {
  8763. xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
  8764. // either expression is a number or it contains position() call; sort by document order
  8765. if (_test != predicate_posinv) set.sort_do();
  8766. bool once = eval_once(set.type(), eval);
  8767. apply_predicate(set, 0, stack, once);
  8768. return set;
  8769. }
  8770. case ast_func_id:
  8771. return xpath_node_set_raw();
  8772. case ast_step:
  8773. {
  8774. switch (_axis)
  8775. {
  8776. case axis_ancestor:
  8777. return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
  8778. case axis_ancestor_or_self:
  8779. return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
  8780. case axis_attribute:
  8781. return step_do(c, stack, eval, axis_to_type<axis_attribute>());
  8782. case axis_child:
  8783. return step_do(c, stack, eval, axis_to_type<axis_child>());
  8784. case axis_descendant:
  8785. return step_do(c, stack, eval, axis_to_type<axis_descendant>());
  8786. case axis_descendant_or_self:
  8787. return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
  8788. case axis_following:
  8789. return step_do(c, stack, eval, axis_to_type<axis_following>());
  8790. case axis_following_sibling:
  8791. return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
  8792. case axis_namespace:
  8793. // namespaced axis is not supported
  8794. return xpath_node_set_raw();
  8795. case axis_parent:
  8796. return step_do(c, stack, eval, axis_to_type<axis_parent>());
  8797. case axis_preceding:
  8798. return step_do(c, stack, eval, axis_to_type<axis_preceding>());
  8799. case axis_preceding_sibling:
  8800. return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
  8801. case axis_self:
  8802. return step_do(c, stack, eval, axis_to_type<axis_self>());
  8803. default:
  8804. assert(false && "Unknown axis"); // unreachable
  8805. return xpath_node_set_raw();
  8806. }
  8807. }
  8808. case ast_step_root:
  8809. {
  8810. assert(!_right); // root step can't have any predicates
  8811. xpath_node_set_raw ns;
  8812. ns.set_type(xpath_node_set::type_sorted);
  8813. if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
  8814. else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
  8815. return ns;
  8816. }
  8817. case ast_variable:
  8818. {
  8819. assert(_rettype == _data.variable->type());
  8820. if (_rettype == xpath_type_node_set)
  8821. {
  8822. const xpath_node_set& s = _data.variable->get_node_set();
  8823. xpath_node_set_raw ns;
  8824. ns.set_type(s.type());
  8825. ns.append(s.begin(), s.end(), stack.result);
  8826. return ns;
  8827. }
  8828. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8829. break;
  8830. }
  8831. default:
  8832. ;
  8833. }
  8834. // none of the ast types that return the value directly matched, but conversions to node set are invalid
  8835. assert(false && "Wrong expression for return type node set"); // unreachable
  8836. return xpath_node_set_raw();
  8837. }
  8838. void optimize(xpath_allocator* alloc)
  8839. {
  8840. if (_left)
  8841. _left->optimize(alloc);
  8842. if (_right)
  8843. _right->optimize(alloc);
  8844. if (_next)
  8845. _next->optimize(alloc);
  8846. // coverity[var_deref_model]
  8847. optimize_self(alloc);
  8848. }
  8849. void optimize_self(xpath_allocator* alloc)
  8850. {
  8851. // Rewrite [position()=expr] with [expr]
  8852. // Note that this step has to go before classification to recognize [position()=1]
  8853. if ((_type == ast_filter || _type == ast_predicate) &&
  8854. _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
  8855. _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
  8856. {
  8857. _right = _right->_right;
  8858. }
  8859. // Classify filter/predicate ops to perform various optimizations during evaluation
  8860. if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
  8861. {
  8862. assert(_test == predicate_default);
  8863. if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
  8864. _test = predicate_constant_one;
  8865. else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
  8866. _test = predicate_constant;
  8867. else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
  8868. _test = predicate_posinv;
  8869. }
  8870. // Rewrite descendant-or-self::node()/child::foo with descendant::foo
  8871. // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
  8872. // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
  8873. // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
  8874. if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) &&
  8875. _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
  8876. is_posinv_step())
  8877. {
  8878. if (_axis == axis_child || _axis == axis_descendant)
  8879. _axis = axis_descendant;
  8880. else
  8881. _axis = axis_descendant_or_self;
  8882. _left = _left->_left;
  8883. }
  8884. // Use optimized lookup table implementation for translate() with constant arguments
  8885. if (_type == ast_func_translate &&
  8886. _right && // workaround for clang static analyzer (_right is never null for ast_func_translate)
  8887. _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
  8888. {
  8889. unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
  8890. if (table)
  8891. {
  8892. _type = ast_opt_translate_table;
  8893. _data.table = table;
  8894. }
  8895. }
  8896. // Use optimized path for @attr = 'value' or @attr = $value
  8897. if (_type == ast_op_equal &&
  8898. _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal)
  8899. // coverity[mixed_enums]
  8900. _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
  8901. (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
  8902. {
  8903. _type = ast_opt_compare_attribute;
  8904. }
  8905. }
  8906. bool is_posinv_expr() const
  8907. {
  8908. switch (_type)
  8909. {
  8910. case ast_func_position:
  8911. case ast_func_last:
  8912. return false;
  8913. case ast_string_constant:
  8914. case ast_number_constant:
  8915. case ast_variable:
  8916. return true;
  8917. case ast_step:
  8918. case ast_step_root:
  8919. return true;
  8920. case ast_predicate:
  8921. case ast_filter:
  8922. return true;
  8923. default:
  8924. if (_left && !_left->is_posinv_expr()) return false;
  8925. for (xpath_ast_node* n = _right; n; n = n->_next)
  8926. if (!n->is_posinv_expr()) return false;
  8927. return true;
  8928. }
  8929. }
  8930. bool is_posinv_step() const
  8931. {
  8932. assert(_type == ast_step);
  8933. for (xpath_ast_node* n = _right; n; n = n->_next)
  8934. {
  8935. assert(n->_type == ast_predicate);
  8936. if (n->_test != predicate_posinv)
  8937. return false;
  8938. }
  8939. return true;
  8940. }
  8941. xpath_value_type rettype() const
  8942. {
  8943. return static_cast<xpath_value_type>(_rettype);
  8944. }
  8945. };
  8946. static const size_t xpath_ast_depth_limit =
  8947. #ifdef PUGIXML_XPATH_DEPTH_LIMIT
  8948. PUGIXML_XPATH_DEPTH_LIMIT
  8949. #else
  8950. 1024
  8951. #endif
  8952. ;
  8953. struct xpath_parser
  8954. {
  8955. xpath_allocator* _alloc;
  8956. xpath_lexer _lexer;
  8957. const char_t* _query;
  8958. xpath_variable_set* _variables;
  8959. xpath_parse_result* _result;
  8960. char_t _scratch[32];
  8961. size_t _depth;
  8962. xpath_ast_node* error(const char* message)
  8963. {
  8964. _result->error = message;
  8965. _result->offset = _lexer.current_pos() - _query;
  8966. return 0;
  8967. }
  8968. xpath_ast_node* error_oom()
  8969. {
  8970. assert(_alloc->_error);
  8971. *_alloc->_error = true;
  8972. return 0;
  8973. }
  8974. xpath_ast_node* error_rec()
  8975. {
  8976. return error("Exceeded maximum allowed query depth");
  8977. }
  8978. void* alloc_node()
  8979. {
  8980. return _alloc->allocate(sizeof(xpath_ast_node));
  8981. }
  8982. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
  8983. {
  8984. void* memory = alloc_node();
  8985. return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
  8986. }
  8987. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
  8988. {
  8989. void* memory = alloc_node();
  8990. return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
  8991. }
  8992. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
  8993. {
  8994. void* memory = alloc_node();
  8995. return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
  8996. }
  8997. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
  8998. {
  8999. void* memory = alloc_node();
  9000. return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
  9001. }
  9002. xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
  9003. {
  9004. void* memory = alloc_node();
  9005. return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
  9006. }
  9007. xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
  9008. {
  9009. void* memory = alloc_node();
  9010. return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
  9011. }
  9012. const char_t* alloc_string(const xpath_lexer_string& value)
  9013. {
  9014. if (!value.begin)
  9015. return PUGIXML_TEXT("");
  9016. size_t length = static_cast<size_t>(value.end - value.begin);
  9017. char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
  9018. if (!c) return 0;
  9019. memcpy(c, value.begin, length * sizeof(char_t));
  9020. c[length] = 0;
  9021. return c;
  9022. }
  9023. xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
  9024. {
  9025. switch (name.begin[0])
  9026. {
  9027. case 'b':
  9028. if (name == PUGIXML_TEXT("boolean") && argc == 1)
  9029. return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
  9030. break;
  9031. case 'c':
  9032. if (name == PUGIXML_TEXT("count") && argc == 1)
  9033. {
  9034. if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  9035. return alloc_node(ast_func_count, xpath_type_number, args[0]);
  9036. }
  9037. else if (name == PUGIXML_TEXT("contains") && argc == 2)
  9038. return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
  9039. else if (name == PUGIXML_TEXT("concat") && argc >= 2)
  9040. return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
  9041. else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
  9042. return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
  9043. break;
  9044. case 'f':
  9045. if (name == PUGIXML_TEXT("false") && argc == 0)
  9046. return alloc_node(ast_func_false, xpath_type_boolean);
  9047. else if (name == PUGIXML_TEXT("floor") && argc == 1)
  9048. return alloc_node(ast_func_floor, xpath_type_number, args[0]);
  9049. break;
  9050. case 'i':
  9051. if (name == PUGIXML_TEXT("id") && argc == 1)
  9052. return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
  9053. break;
  9054. case 'l':
  9055. if (name == PUGIXML_TEXT("last") && argc == 0)
  9056. return alloc_node(ast_func_last, xpath_type_number);
  9057. else if (name == PUGIXML_TEXT("lang") && argc == 1)
  9058. return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
  9059. else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
  9060. {
  9061. if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  9062. return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
  9063. }
  9064. break;
  9065. case 'n':
  9066. if (name == PUGIXML_TEXT("name") && argc <= 1)
  9067. {
  9068. if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  9069. return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
  9070. }
  9071. else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
  9072. {
  9073. if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  9074. return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
  9075. }
  9076. else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
  9077. return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
  9078. else if (name == PUGIXML_TEXT("not") && argc == 1)
  9079. return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
  9080. else if (name == PUGIXML_TEXT("number") && argc <= 1)
  9081. return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
  9082. break;
  9083. case 'p':
  9084. if (name == PUGIXML_TEXT("position") && argc == 0)
  9085. return alloc_node(ast_func_position, xpath_type_number);
  9086. break;
  9087. case 'r':
  9088. if (name == PUGIXML_TEXT("round") && argc == 1)
  9089. return alloc_node(ast_func_round, xpath_type_number, args[0]);
  9090. break;
  9091. case 's':
  9092. if (name == PUGIXML_TEXT("string") && argc <= 1)
  9093. return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
  9094. else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
  9095. return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
  9096. else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
  9097. return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
  9098. else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
  9099. return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
  9100. else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
  9101. return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
  9102. else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
  9103. return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
  9104. else if (name == PUGIXML_TEXT("sum") && argc == 1)
  9105. {
  9106. if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  9107. return alloc_node(ast_func_sum, xpath_type_number, args[0]);
  9108. }
  9109. break;
  9110. case 't':
  9111. if (name == PUGIXML_TEXT("translate") && argc == 3)
  9112. return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
  9113. else if (name == PUGIXML_TEXT("true") && argc == 0)
  9114. return alloc_node(ast_func_true, xpath_type_boolean);
  9115. break;
  9116. default:
  9117. break;
  9118. }
  9119. return error("Unrecognized function or wrong parameter count");
  9120. }
  9121. axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
  9122. {
  9123. specified = true;
  9124. switch (name.begin[0])
  9125. {
  9126. case 'a':
  9127. if (name == PUGIXML_TEXT("ancestor"))
  9128. return axis_ancestor;
  9129. else if (name == PUGIXML_TEXT("ancestor-or-self"))
  9130. return axis_ancestor_or_self;
  9131. else if (name == PUGIXML_TEXT("attribute"))
  9132. return axis_attribute;
  9133. break;
  9134. case 'c':
  9135. if (name == PUGIXML_TEXT("child"))
  9136. return axis_child;
  9137. break;
  9138. case 'd':
  9139. if (name == PUGIXML_TEXT("descendant"))
  9140. return axis_descendant;
  9141. else if (name == PUGIXML_TEXT("descendant-or-self"))
  9142. return axis_descendant_or_self;
  9143. break;
  9144. case 'f':
  9145. if (name == PUGIXML_TEXT("following"))
  9146. return axis_following;
  9147. else if (name == PUGIXML_TEXT("following-sibling"))
  9148. return axis_following_sibling;
  9149. break;
  9150. case 'n':
  9151. if (name == PUGIXML_TEXT("namespace"))
  9152. return axis_namespace;
  9153. break;
  9154. case 'p':
  9155. if (name == PUGIXML_TEXT("parent"))
  9156. return axis_parent;
  9157. else if (name == PUGIXML_TEXT("preceding"))
  9158. return axis_preceding;
  9159. else if (name == PUGIXML_TEXT("preceding-sibling"))
  9160. return axis_preceding_sibling;
  9161. break;
  9162. case 's':
  9163. if (name == PUGIXML_TEXT("self"))
  9164. return axis_self;
  9165. break;
  9166. default:
  9167. break;
  9168. }
  9169. specified = false;
  9170. return axis_child;
  9171. }
  9172. nodetest_t parse_node_test_type(const xpath_lexer_string& name)
  9173. {
  9174. switch (name.begin[0])
  9175. {
  9176. case 'c':
  9177. if (name == PUGIXML_TEXT("comment"))
  9178. return nodetest_type_comment;
  9179. break;
  9180. case 'n':
  9181. if (name == PUGIXML_TEXT("node"))
  9182. return nodetest_type_node;
  9183. break;
  9184. case 'p':
  9185. if (name == PUGIXML_TEXT("processing-instruction"))
  9186. return nodetest_type_pi;
  9187. break;
  9188. case 't':
  9189. if (name == PUGIXML_TEXT("text"))
  9190. return nodetest_type_text;
  9191. break;
  9192. default:
  9193. break;
  9194. }
  9195. return nodetest_none;
  9196. }
  9197. // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
  9198. xpath_ast_node* parse_primary_expression()
  9199. {
  9200. switch (_lexer.current())
  9201. {
  9202. case lex_var_ref:
  9203. {
  9204. xpath_lexer_string name = _lexer.contents();
  9205. if (!_variables)
  9206. return error("Unknown variable: variable set is not provided");
  9207. xpath_variable* var = 0;
  9208. if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
  9209. return error_oom();
  9210. if (!var)
  9211. return error("Unknown variable: variable set does not contain the given name");
  9212. _lexer.next();
  9213. return alloc_node(ast_variable, var->type(), var);
  9214. }
  9215. case lex_open_brace:
  9216. {
  9217. _lexer.next();
  9218. xpath_ast_node* n = parse_expression();
  9219. if (!n) return 0;
  9220. if (_lexer.current() != lex_close_brace)
  9221. return error("Expected ')' to match an opening '('");
  9222. _lexer.next();
  9223. return n;
  9224. }
  9225. case lex_quoted_string:
  9226. {
  9227. const char_t* value = alloc_string(_lexer.contents());
  9228. if (!value) return 0;
  9229. _lexer.next();
  9230. return alloc_node(ast_string_constant, xpath_type_string, value);
  9231. }
  9232. case lex_number:
  9233. {
  9234. double value = 0;
  9235. if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
  9236. return error_oom();
  9237. _lexer.next();
  9238. return alloc_node(ast_number_constant, xpath_type_number, value);
  9239. }
  9240. case lex_string:
  9241. {
  9242. xpath_ast_node* args[2] = {0};
  9243. size_t argc = 0;
  9244. xpath_lexer_string function = _lexer.contents();
  9245. _lexer.next();
  9246. xpath_ast_node* last_arg = 0;
  9247. if (_lexer.current() != lex_open_brace)
  9248. return error("Unrecognized function call");
  9249. _lexer.next();
  9250. size_t old_depth = _depth;
  9251. while (_lexer.current() != lex_close_brace)
  9252. {
  9253. if (argc > 0)
  9254. {
  9255. if (_lexer.current() != lex_comma)
  9256. return error("No comma between function arguments");
  9257. _lexer.next();
  9258. }
  9259. if (++_depth > xpath_ast_depth_limit)
  9260. return error_rec();
  9261. xpath_ast_node* n = parse_expression();
  9262. if (!n) return 0;
  9263. if (argc < 2) args[argc] = n;
  9264. else last_arg->set_next(n);
  9265. argc++;
  9266. last_arg = n;
  9267. }
  9268. _lexer.next();
  9269. _depth = old_depth;
  9270. return parse_function(function, argc, args);
  9271. }
  9272. default:
  9273. return error("Unrecognizable primary expression");
  9274. }
  9275. }
  9276. // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
  9277. // Predicate ::= '[' PredicateExpr ']'
  9278. // PredicateExpr ::= Expr
  9279. xpath_ast_node* parse_filter_expression()
  9280. {
  9281. xpath_ast_node* n = parse_primary_expression();
  9282. if (!n) return 0;
  9283. size_t old_depth = _depth;
  9284. while (_lexer.current() == lex_open_square_brace)
  9285. {
  9286. _lexer.next();
  9287. if (++_depth > xpath_ast_depth_limit)
  9288. return error_rec();
  9289. if (n->rettype() != xpath_type_node_set)
  9290. return error("Predicate has to be applied to node set");
  9291. xpath_ast_node* expr = parse_expression();
  9292. if (!expr) return 0;
  9293. n = alloc_node(ast_filter, n, expr, predicate_default);
  9294. if (!n) return 0;
  9295. if (_lexer.current() != lex_close_square_brace)
  9296. return error("Expected ']' to match an opening '['");
  9297. _lexer.next();
  9298. }
  9299. _depth = old_depth;
  9300. return n;
  9301. }
  9302. // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
  9303. // AxisSpecifier ::= AxisName '::' | '@'?
  9304. // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
  9305. // NameTest ::= '*' | NCName ':' '*' | QName
  9306. // AbbreviatedStep ::= '.' | '..'
  9307. xpath_ast_node* parse_step(xpath_ast_node* set)
  9308. {
  9309. if (set && set->rettype() != xpath_type_node_set)
  9310. return error("Step has to be applied to node set");
  9311. bool axis_specified = false;
  9312. axis_t axis = axis_child; // implied child axis
  9313. if (_lexer.current() == lex_axis_attribute)
  9314. {
  9315. axis = axis_attribute;
  9316. axis_specified = true;
  9317. _lexer.next();
  9318. }
  9319. else if (_lexer.current() == lex_dot)
  9320. {
  9321. _lexer.next();
  9322. if (_lexer.current() == lex_open_square_brace)
  9323. return error("Predicates are not allowed after an abbreviated step");
  9324. return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
  9325. }
  9326. else if (_lexer.current() == lex_double_dot)
  9327. {
  9328. _lexer.next();
  9329. if (_lexer.current() == lex_open_square_brace)
  9330. return error("Predicates are not allowed after an abbreviated step");
  9331. return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
  9332. }
  9333. nodetest_t nt_type = nodetest_none;
  9334. xpath_lexer_string nt_name;
  9335. if (_lexer.current() == lex_string)
  9336. {
  9337. // node name test
  9338. nt_name = _lexer.contents();
  9339. _lexer.next();
  9340. // was it an axis name?
  9341. if (_lexer.current() == lex_double_colon)
  9342. {
  9343. // parse axis name
  9344. if (axis_specified)
  9345. return error("Two axis specifiers in one step");
  9346. axis = parse_axis_name(nt_name, axis_specified);
  9347. if (!axis_specified)
  9348. return error("Unknown axis");
  9349. // read actual node test
  9350. _lexer.next();
  9351. if (_lexer.current() == lex_multiply)
  9352. {
  9353. nt_type = nodetest_all;
  9354. nt_name = xpath_lexer_string();
  9355. _lexer.next();
  9356. }
  9357. else if (_lexer.current() == lex_string)
  9358. {
  9359. nt_name = _lexer.contents();
  9360. _lexer.next();
  9361. }
  9362. else
  9363. {
  9364. return error("Unrecognized node test");
  9365. }
  9366. }
  9367. if (nt_type == nodetest_none)
  9368. {
  9369. // node type test or processing-instruction
  9370. if (_lexer.current() == lex_open_brace)
  9371. {
  9372. _lexer.next();
  9373. if (_lexer.current() == lex_close_brace)
  9374. {
  9375. _lexer.next();
  9376. nt_type = parse_node_test_type(nt_name);
  9377. if (nt_type == nodetest_none)
  9378. return error("Unrecognized node type");
  9379. nt_name = xpath_lexer_string();
  9380. }
  9381. else if (nt_name == PUGIXML_TEXT("processing-instruction"))
  9382. {
  9383. if (_lexer.current() != lex_quoted_string)
  9384. return error("Only literals are allowed as arguments to processing-instruction()");
  9385. nt_type = nodetest_pi;
  9386. nt_name = _lexer.contents();
  9387. _lexer.next();
  9388. if (_lexer.current() != lex_close_brace)
  9389. return error("Unmatched brace near processing-instruction()");
  9390. _lexer.next();
  9391. }
  9392. else
  9393. {
  9394. return error("Unmatched brace near node type test");
  9395. }
  9396. }
  9397. // QName or NCName:*
  9398. else
  9399. {
  9400. if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
  9401. {
  9402. nt_name.end--; // erase *
  9403. nt_type = nodetest_all_in_namespace;
  9404. }
  9405. else
  9406. {
  9407. nt_type = nodetest_name;
  9408. }
  9409. }
  9410. }
  9411. }
  9412. else if (_lexer.current() == lex_multiply)
  9413. {
  9414. nt_type = nodetest_all;
  9415. _lexer.next();
  9416. }
  9417. else
  9418. {
  9419. return error("Unrecognized node test");
  9420. }
  9421. const char_t* nt_name_copy = alloc_string(nt_name);
  9422. if (!nt_name_copy) return 0;
  9423. xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
  9424. if (!n) return 0;
  9425. size_t old_depth = _depth;
  9426. xpath_ast_node* last = 0;
  9427. while (_lexer.current() == lex_open_square_brace)
  9428. {
  9429. _lexer.next();
  9430. if (++_depth > xpath_ast_depth_limit)
  9431. return error_rec();
  9432. xpath_ast_node* expr = parse_expression();
  9433. if (!expr) return 0;
  9434. xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
  9435. if (!pred) return 0;
  9436. if (_lexer.current() != lex_close_square_brace)
  9437. return error("Expected ']' to match an opening '['");
  9438. _lexer.next();
  9439. if (last) last->set_next(pred);
  9440. else n->set_right(pred);
  9441. last = pred;
  9442. }
  9443. _depth = old_depth;
  9444. return n;
  9445. }
  9446. // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
  9447. xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
  9448. {
  9449. xpath_ast_node* n = parse_step(set);
  9450. if (!n) return 0;
  9451. size_t old_depth = _depth;
  9452. while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
  9453. {
  9454. lexeme_t l = _lexer.current();
  9455. _lexer.next();
  9456. if (l == lex_double_slash)
  9457. {
  9458. n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
  9459. if (!n) return 0;
  9460. ++_depth;
  9461. }
  9462. if (++_depth > xpath_ast_depth_limit)
  9463. return error_rec();
  9464. n = parse_step(n);
  9465. if (!n) return 0;
  9466. }
  9467. _depth = old_depth;
  9468. return n;
  9469. }
  9470. // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
  9471. // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
  9472. xpath_ast_node* parse_location_path()
  9473. {
  9474. if (_lexer.current() == lex_slash)
  9475. {
  9476. _lexer.next();
  9477. xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
  9478. if (!n) return 0;
  9479. // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
  9480. lexeme_t l = _lexer.current();
  9481. if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
  9482. return parse_relative_location_path(n);
  9483. else
  9484. return n;
  9485. }
  9486. else if (_lexer.current() == lex_double_slash)
  9487. {
  9488. _lexer.next();
  9489. xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
  9490. if (!n) return 0;
  9491. n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
  9492. if (!n) return 0;
  9493. return parse_relative_location_path(n);
  9494. }
  9495. // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
  9496. return parse_relative_location_path(0);
  9497. }
  9498. // PathExpr ::= LocationPath
  9499. // | FilterExpr
  9500. // | FilterExpr '/' RelativeLocationPath
  9501. // | FilterExpr '//' RelativeLocationPath
  9502. // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
  9503. // UnaryExpr ::= UnionExpr | '-' UnaryExpr
  9504. xpath_ast_node* parse_path_or_unary_expression()
  9505. {
  9506. // Clarification.
  9507. // PathExpr begins with either LocationPath or FilterExpr.
  9508. // FilterExpr begins with PrimaryExpr
  9509. // PrimaryExpr begins with '$' in case of it being a variable reference,
  9510. // '(' in case of it being an expression, string literal, number constant or
  9511. // function call.
  9512. if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
  9513. _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
  9514. _lexer.current() == lex_string)
  9515. {
  9516. if (_lexer.current() == lex_string)
  9517. {
  9518. // This is either a function call, or not - if not, we shall proceed with location path
  9519. const char_t* state = _lexer.state();
  9520. while (PUGI_IMPL_IS_CHARTYPE(*state, ct_space)) ++state;
  9521. if (*state != '(')
  9522. return parse_location_path();
  9523. // This looks like a function call; however this still can be a node-test. Check it.
  9524. if (parse_node_test_type(_lexer.contents()) != nodetest_none)
  9525. return parse_location_path();
  9526. }
  9527. xpath_ast_node* n = parse_filter_expression();
  9528. if (!n) return 0;
  9529. if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
  9530. {
  9531. lexeme_t l = _lexer.current();
  9532. _lexer.next();
  9533. if (l == lex_double_slash)
  9534. {
  9535. if (n->rettype() != xpath_type_node_set)
  9536. return error("Step has to be applied to node set");
  9537. n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
  9538. if (!n) return 0;
  9539. }
  9540. // select from location path
  9541. return parse_relative_location_path(n);
  9542. }
  9543. return n;
  9544. }
  9545. else if (_lexer.current() == lex_minus)
  9546. {
  9547. _lexer.next();
  9548. // precedence 7+ - only parses union expressions
  9549. xpath_ast_node* n = parse_expression(7);
  9550. if (!n) return 0;
  9551. return alloc_node(ast_op_negate, xpath_type_number, n);
  9552. }
  9553. else
  9554. {
  9555. return parse_location_path();
  9556. }
  9557. }
  9558. struct binary_op_t
  9559. {
  9560. ast_type_t asttype;
  9561. xpath_value_type rettype;
  9562. int precedence;
  9563. binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
  9564. {
  9565. }
  9566. binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
  9567. {
  9568. }
  9569. static binary_op_t parse(xpath_lexer& lexer)
  9570. {
  9571. switch (lexer.current())
  9572. {
  9573. case lex_string:
  9574. if (lexer.contents() == PUGIXML_TEXT("or"))
  9575. return binary_op_t(ast_op_or, xpath_type_boolean, 1);
  9576. else if (lexer.contents() == PUGIXML_TEXT("and"))
  9577. return binary_op_t(ast_op_and, xpath_type_boolean, 2);
  9578. else if (lexer.contents() == PUGIXML_TEXT("div"))
  9579. return binary_op_t(ast_op_divide, xpath_type_number, 6);
  9580. else if (lexer.contents() == PUGIXML_TEXT("mod"))
  9581. return binary_op_t(ast_op_mod, xpath_type_number, 6);
  9582. else
  9583. return binary_op_t();
  9584. case lex_equal:
  9585. return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
  9586. case lex_not_equal:
  9587. return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
  9588. case lex_less:
  9589. return binary_op_t(ast_op_less, xpath_type_boolean, 4);
  9590. case lex_greater:
  9591. return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
  9592. case lex_less_or_equal:
  9593. return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
  9594. case lex_greater_or_equal:
  9595. return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
  9596. case lex_plus:
  9597. return binary_op_t(ast_op_add, xpath_type_number, 5);
  9598. case lex_minus:
  9599. return binary_op_t(ast_op_subtract, xpath_type_number, 5);
  9600. case lex_multiply:
  9601. return binary_op_t(ast_op_multiply, xpath_type_number, 6);
  9602. case lex_union:
  9603. return binary_op_t(ast_op_union, xpath_type_node_set, 7);
  9604. default:
  9605. return binary_op_t();
  9606. }
  9607. }
  9608. };
  9609. xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
  9610. {
  9611. binary_op_t op = binary_op_t::parse(_lexer);
  9612. while (op.asttype != ast_unknown && op.precedence >= limit)
  9613. {
  9614. _lexer.next();
  9615. if (++_depth > xpath_ast_depth_limit)
  9616. return error_rec();
  9617. xpath_ast_node* rhs = parse_path_or_unary_expression();
  9618. if (!rhs) return 0;
  9619. binary_op_t nextop = binary_op_t::parse(_lexer);
  9620. while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
  9621. {
  9622. rhs = parse_expression_rec(rhs, nextop.precedence);
  9623. if (!rhs) return 0;
  9624. nextop = binary_op_t::parse(_lexer);
  9625. }
  9626. if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
  9627. return error("Union operator has to be applied to node sets");
  9628. lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
  9629. if (!lhs) return 0;
  9630. op = binary_op_t::parse(_lexer);
  9631. }
  9632. return lhs;
  9633. }
  9634. // Expr ::= OrExpr
  9635. // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
  9636. // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
  9637. // EqualityExpr ::= RelationalExpr
  9638. // | EqualityExpr '=' RelationalExpr
  9639. // | EqualityExpr '!=' RelationalExpr
  9640. // RelationalExpr ::= AdditiveExpr
  9641. // | RelationalExpr '<' AdditiveExpr
  9642. // | RelationalExpr '>' AdditiveExpr
  9643. // | RelationalExpr '<=' AdditiveExpr
  9644. // | RelationalExpr '>=' AdditiveExpr
  9645. // AdditiveExpr ::= MultiplicativeExpr
  9646. // | AdditiveExpr '+' MultiplicativeExpr
  9647. // | AdditiveExpr '-' MultiplicativeExpr
  9648. // MultiplicativeExpr ::= UnaryExpr
  9649. // | MultiplicativeExpr '*' UnaryExpr
  9650. // | MultiplicativeExpr 'div' UnaryExpr
  9651. // | MultiplicativeExpr 'mod' UnaryExpr
  9652. xpath_ast_node* parse_expression(int limit = 0)
  9653. {
  9654. size_t old_depth = _depth;
  9655. if (++_depth > xpath_ast_depth_limit)
  9656. return error_rec();
  9657. xpath_ast_node* n = parse_path_or_unary_expression();
  9658. if (!n) return 0;
  9659. n = parse_expression_rec(n, limit);
  9660. _depth = old_depth;
  9661. return n;
  9662. }
  9663. xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0)
  9664. {
  9665. }
  9666. xpath_ast_node* parse()
  9667. {
  9668. xpath_ast_node* n = parse_expression();
  9669. if (!n) return 0;
  9670. assert(_depth == 0);
  9671. // check if there are unparsed tokens left
  9672. if (_lexer.current() != lex_eof)
  9673. return error("Incorrect query");
  9674. return n;
  9675. }
  9676. static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
  9677. {
  9678. xpath_parser parser(query, variables, alloc, result);
  9679. return parser.parse();
  9680. }
  9681. };
  9682. struct xpath_query_impl
  9683. {
  9684. static xpath_query_impl* create()
  9685. {
  9686. void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
  9687. if (!memory) return 0;
  9688. return new (memory) xpath_query_impl();
  9689. }
  9690. static void destroy(xpath_query_impl* impl)
  9691. {
  9692. // free all allocated pages
  9693. impl->alloc.release();
  9694. // free allocator memory (with the first page)
  9695. xml_memory::deallocate(impl);
  9696. }
  9697. xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
  9698. {
  9699. block.next = 0;
  9700. block.capacity = sizeof(block.data);
  9701. }
  9702. xpath_ast_node* root;
  9703. xpath_allocator alloc;
  9704. xpath_memory_block block;
  9705. bool oom;
  9706. };
  9707. PUGI_IMPL_FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
  9708. {
  9709. if (!impl) return 0;
  9710. if (impl->root->rettype() != xpath_type_node_set)
  9711. {
  9712. #ifdef PUGIXML_NO_EXCEPTIONS
  9713. return 0;
  9714. #else
  9715. xpath_parse_result res;
  9716. res.error = "Expression does not evaluate to node set";
  9717. throw xpath_exception(res);
  9718. #endif
  9719. }
  9720. return impl->root;
  9721. }
  9722. PUGI_IMPL_NS_END
  9723. namespace pugi
  9724. {
  9725. #ifndef PUGIXML_NO_EXCEPTIONS
  9726. PUGI_IMPL_FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
  9727. {
  9728. assert(_result.error);
  9729. }
  9730. PUGI_IMPL_FN const char* xpath_exception::what() const throw()
  9731. {
  9732. return _result.error;
  9733. }
  9734. PUGI_IMPL_FN const xpath_parse_result& xpath_exception::result() const
  9735. {
  9736. return _result;
  9737. }
  9738. #endif
  9739. PUGI_IMPL_FN xpath_node::xpath_node()
  9740. {
  9741. }
  9742. PUGI_IMPL_FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
  9743. {
  9744. }
  9745. PUGI_IMPL_FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
  9746. {
  9747. }
  9748. PUGI_IMPL_FN xml_node xpath_node::node() const
  9749. {
  9750. return _attribute ? xml_node() : _node;
  9751. }
  9752. PUGI_IMPL_FN xml_attribute xpath_node::attribute() const
  9753. {
  9754. return _attribute;
  9755. }
  9756. PUGI_IMPL_FN xml_node xpath_node::parent() const
  9757. {
  9758. return _attribute ? _node : _node.parent();
  9759. }
  9760. PUGI_IMPL_FN static void unspecified_bool_xpath_node(xpath_node***)
  9761. {
  9762. }
  9763. PUGI_IMPL_FN xpath_node::operator xpath_node::unspecified_bool_type() const
  9764. {
  9765. return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
  9766. }
  9767. PUGI_IMPL_FN bool xpath_node::operator!() const
  9768. {
  9769. return !(_node || _attribute);
  9770. }
  9771. PUGI_IMPL_FN bool xpath_node::operator==(const xpath_node& n) const
  9772. {
  9773. return _node == n._node && _attribute == n._attribute;
  9774. }
  9775. PUGI_IMPL_FN bool xpath_node::operator!=(const xpath_node& n) const
  9776. {
  9777. return _node != n._node || _attribute != n._attribute;
  9778. }
  9779. #ifdef __BORLANDC__
  9780. PUGI_IMPL_FN bool operator&&(const xpath_node& lhs, bool rhs)
  9781. {
  9782. return (bool)lhs && rhs;
  9783. }
  9784. PUGI_IMPL_FN bool operator||(const xpath_node& lhs, bool rhs)
  9785. {
  9786. return (bool)lhs || rhs;
  9787. }
  9788. #endif
  9789. PUGI_IMPL_FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
  9790. {
  9791. assert(begin_ <= end_);
  9792. size_t size_ = static_cast<size_t>(end_ - begin_);
  9793. // use internal buffer for 0 or 1 elements, heap buffer otherwise
  9794. xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
  9795. if (!storage)
  9796. {
  9797. #ifdef PUGIXML_NO_EXCEPTIONS
  9798. return;
  9799. #else
  9800. throw std::bad_alloc();
  9801. #endif
  9802. }
  9803. // deallocate old buffer
  9804. if (_begin != _storage)
  9805. impl::xml_memory::deallocate(_begin);
  9806. // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB
  9807. if (size_)
  9808. memcpy(storage, begin_, size_ * sizeof(xpath_node));
  9809. _begin = storage;
  9810. _end = storage + size_;
  9811. _type = type_;
  9812. }
  9813. #ifdef PUGIXML_HAS_MOVE
  9814. PUGI_IMPL_FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
  9815. {
  9816. _type = rhs._type;
  9817. _storage[0] = rhs._storage[0];
  9818. _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin;
  9819. _end = _begin + (rhs._end - rhs._begin);
  9820. rhs._type = type_unsorted;
  9821. rhs._begin = rhs._storage;
  9822. rhs._end = rhs._storage;
  9823. }
  9824. #endif
  9825. PUGI_IMPL_FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage)
  9826. {
  9827. }
  9828. PUGI_IMPL_FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage)
  9829. {
  9830. _assign(begin_, end_, type_);
  9831. }
  9832. PUGI_IMPL_FN xpath_node_set::~xpath_node_set()
  9833. {
  9834. if (_begin != _storage)
  9835. impl::xml_memory::deallocate(_begin);
  9836. }
  9837. PUGI_IMPL_FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage)
  9838. {
  9839. _assign(ns._begin, ns._end, ns._type);
  9840. }
  9841. PUGI_IMPL_FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
  9842. {
  9843. if (this == &ns) return *this;
  9844. _assign(ns._begin, ns._end, ns._type);
  9845. return *this;
  9846. }
  9847. #ifdef PUGIXML_HAS_MOVE
  9848. PUGI_IMPL_FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage)
  9849. {
  9850. _move(rhs);
  9851. }
  9852. PUGI_IMPL_FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
  9853. {
  9854. if (this == &rhs) return *this;
  9855. if (_begin != _storage)
  9856. impl::xml_memory::deallocate(_begin);
  9857. _move(rhs);
  9858. return *this;
  9859. }
  9860. #endif
  9861. PUGI_IMPL_FN xpath_node_set::type_t xpath_node_set::type() const
  9862. {
  9863. return _type;
  9864. }
  9865. PUGI_IMPL_FN size_t xpath_node_set::size() const
  9866. {
  9867. return _end - _begin;
  9868. }
  9869. PUGI_IMPL_FN bool xpath_node_set::empty() const
  9870. {
  9871. return _begin == _end;
  9872. }
  9873. PUGI_IMPL_FN const xpath_node& xpath_node_set::operator[](size_t index) const
  9874. {
  9875. assert(index < size());
  9876. return _begin[index];
  9877. }
  9878. PUGI_IMPL_FN xpath_node_set::const_iterator xpath_node_set::begin() const
  9879. {
  9880. return _begin;
  9881. }
  9882. PUGI_IMPL_FN xpath_node_set::const_iterator xpath_node_set::end() const
  9883. {
  9884. return _end;
  9885. }
  9886. PUGI_IMPL_FN void xpath_node_set::sort(bool reverse)
  9887. {
  9888. _type = impl::xpath_sort(_begin, _end, _type, reverse);
  9889. }
  9890. PUGI_IMPL_FN xpath_node xpath_node_set::first() const
  9891. {
  9892. return impl::xpath_first(_begin, _end, _type);
  9893. }
  9894. PUGI_IMPL_FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
  9895. {
  9896. }
  9897. PUGI_IMPL_FN xpath_parse_result::operator bool() const
  9898. {
  9899. return error == 0;
  9900. }
  9901. PUGI_IMPL_FN const char* xpath_parse_result::description() const
  9902. {
  9903. return error ? error : "No error";
  9904. }
  9905. PUGI_IMPL_FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
  9906. {
  9907. }
  9908. PUGI_IMPL_FN const char_t* xpath_variable::name() const
  9909. {
  9910. switch (_type)
  9911. {
  9912. case xpath_type_node_set:
  9913. return static_cast<const impl::xpath_variable_node_set*>(this)->name;
  9914. case xpath_type_number:
  9915. return static_cast<const impl::xpath_variable_number*>(this)->name;
  9916. case xpath_type_string:
  9917. return static_cast<const impl::xpath_variable_string*>(this)->name;
  9918. case xpath_type_boolean:
  9919. return static_cast<const impl::xpath_variable_boolean*>(this)->name;
  9920. default:
  9921. assert(false && "Invalid variable type"); // unreachable
  9922. return 0;
  9923. }
  9924. }
  9925. PUGI_IMPL_FN xpath_value_type xpath_variable::type() const
  9926. {
  9927. return _type;
  9928. }
  9929. PUGI_IMPL_FN bool xpath_variable::get_boolean() const
  9930. {
  9931. return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
  9932. }
  9933. PUGI_IMPL_FN double xpath_variable::get_number() const
  9934. {
  9935. return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
  9936. }
  9937. PUGI_IMPL_FN const char_t* xpath_variable::get_string() const
  9938. {
  9939. const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
  9940. return value ? value : PUGIXML_TEXT("");
  9941. }
  9942. PUGI_IMPL_FN const xpath_node_set& xpath_variable::get_node_set() const
  9943. {
  9944. return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
  9945. }
  9946. PUGI_IMPL_FN bool xpath_variable::set(bool value)
  9947. {
  9948. if (_type != xpath_type_boolean) return false;
  9949. static_cast<impl::xpath_variable_boolean*>(this)->value = value;
  9950. return true;
  9951. }
  9952. PUGI_IMPL_FN bool xpath_variable::set(double value)
  9953. {
  9954. if (_type != xpath_type_number) return false;
  9955. static_cast<impl::xpath_variable_number*>(this)->value = value;
  9956. return true;
  9957. }
  9958. PUGI_IMPL_FN bool xpath_variable::set(const char_t* value)
  9959. {
  9960. if (_type != xpath_type_string) return false;
  9961. impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
  9962. // duplicate string
  9963. size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
  9964. char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
  9965. if (!copy) return false;
  9966. memcpy(copy, value, size);
  9967. // replace old string
  9968. if (var->value) impl::xml_memory::deallocate(var->value);
  9969. var->value = copy;
  9970. return true;
  9971. }
  9972. PUGI_IMPL_FN bool xpath_variable::set(const xpath_node_set& value)
  9973. {
  9974. if (_type != xpath_type_node_set) return false;
  9975. static_cast<impl::xpath_variable_node_set*>(this)->value = value;
  9976. return true;
  9977. }
  9978. PUGI_IMPL_FN xpath_variable_set::xpath_variable_set()
  9979. {
  9980. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9981. _data[i] = 0;
  9982. }
  9983. PUGI_IMPL_FN xpath_variable_set::~xpath_variable_set()
  9984. {
  9985. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9986. _destroy(_data[i]);
  9987. }
  9988. PUGI_IMPL_FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
  9989. {
  9990. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9991. _data[i] = 0;
  9992. _assign(rhs);
  9993. }
  9994. PUGI_IMPL_FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
  9995. {
  9996. if (this == &rhs) return *this;
  9997. _assign(rhs);
  9998. return *this;
  9999. }
  10000. #ifdef PUGIXML_HAS_MOVE
  10001. PUGI_IMPL_FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
  10002. {
  10003. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  10004. {
  10005. _data[i] = rhs._data[i];
  10006. rhs._data[i] = 0;
  10007. }
  10008. }
  10009. PUGI_IMPL_FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
  10010. {
  10011. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  10012. {
  10013. _destroy(_data[i]);
  10014. _data[i] = rhs._data[i];
  10015. rhs._data[i] = 0;
  10016. }
  10017. return *this;
  10018. }
  10019. #endif
  10020. PUGI_IMPL_FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
  10021. {
  10022. xpath_variable_set temp;
  10023. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  10024. if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
  10025. return;
  10026. _swap(temp);
  10027. }
  10028. PUGI_IMPL_FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
  10029. {
  10030. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  10031. {
  10032. xpath_variable* chain = _data[i];
  10033. _data[i] = rhs._data[i];
  10034. rhs._data[i] = chain;
  10035. }
  10036. }
  10037. PUGI_IMPL_FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
  10038. {
  10039. const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
  10040. size_t hash = impl::hash_string(name) % hash_size;
  10041. // look for existing variable
  10042. for (xpath_variable* var = _data[hash]; var; var = var->_next)
  10043. if (impl::strequal(var->name(), name))
  10044. return var;
  10045. return 0;
  10046. }
  10047. PUGI_IMPL_FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
  10048. {
  10049. xpath_variable* last = 0;
  10050. while (var)
  10051. {
  10052. // allocate storage for new variable
  10053. xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
  10054. if (!nvar) return false;
  10055. // link the variable to the result immediately to handle failures gracefully
  10056. if (last)
  10057. last->_next = nvar;
  10058. else
  10059. *out_result = nvar;
  10060. last = nvar;
  10061. // copy the value; this can fail due to out-of-memory conditions
  10062. if (!impl::copy_xpath_variable(nvar, var)) return false;
  10063. var = var->_next;
  10064. }
  10065. return true;
  10066. }
  10067. PUGI_IMPL_FN void xpath_variable_set::_destroy(xpath_variable* var)
  10068. {
  10069. while (var)
  10070. {
  10071. xpath_variable* next = var->_next;
  10072. impl::delete_xpath_variable(var->_type, var);
  10073. var = next;
  10074. }
  10075. }
  10076. PUGI_IMPL_FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
  10077. {
  10078. const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
  10079. size_t hash = impl::hash_string(name) % hash_size;
  10080. // look for existing variable
  10081. for (xpath_variable* var = _data[hash]; var; var = var->_next)
  10082. if (impl::strequal(var->name(), name))
  10083. return var->type() == type ? var : 0;
  10084. // add new variable
  10085. xpath_variable* result = impl::new_xpath_variable(type, name);
  10086. if (result)
  10087. {
  10088. result->_next = _data[hash];
  10089. _data[hash] = result;
  10090. }
  10091. return result;
  10092. }
  10093. PUGI_IMPL_FN bool xpath_variable_set::set(const char_t* name, bool value)
  10094. {
  10095. xpath_variable* var = add(name, xpath_type_boolean);
  10096. return var ? var->set(value) : false;
  10097. }
  10098. PUGI_IMPL_FN bool xpath_variable_set::set(const char_t* name, double value)
  10099. {
  10100. xpath_variable* var = add(name, xpath_type_number);
  10101. return var ? var->set(value) : false;
  10102. }
  10103. PUGI_IMPL_FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
  10104. {
  10105. xpath_variable* var = add(name, xpath_type_string);
  10106. return var ? var->set(value) : false;
  10107. }
  10108. PUGI_IMPL_FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
  10109. {
  10110. xpath_variable* var = add(name, xpath_type_node_set);
  10111. return var ? var->set(value) : false;
  10112. }
  10113. PUGI_IMPL_FN xpath_variable* xpath_variable_set::get(const char_t* name)
  10114. {
  10115. return _find(name);
  10116. }
  10117. PUGI_IMPL_FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
  10118. {
  10119. return _find(name);
  10120. }
  10121. PUGI_IMPL_FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
  10122. {
  10123. impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
  10124. if (!qimpl)
  10125. {
  10126. #ifdef PUGIXML_NO_EXCEPTIONS
  10127. _result.error = "Out of memory";
  10128. #else
  10129. throw std::bad_alloc();
  10130. #endif
  10131. }
  10132. else
  10133. {
  10134. using impl::auto_deleter; // MSVC7 workaround
  10135. auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
  10136. qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
  10137. if (qimpl->root)
  10138. {
  10139. qimpl->root->optimize(&qimpl->alloc);
  10140. _impl = impl.release();
  10141. _result.error = 0;
  10142. }
  10143. else
  10144. {
  10145. #ifdef PUGIXML_NO_EXCEPTIONS
  10146. if (qimpl->oom) _result.error = "Out of memory";
  10147. #else
  10148. if (qimpl->oom) throw std::bad_alloc();
  10149. throw xpath_exception(_result);
  10150. #endif
  10151. }
  10152. }
  10153. }
  10154. PUGI_IMPL_FN xpath_query::xpath_query(): _impl(0)
  10155. {
  10156. }
  10157. PUGI_IMPL_FN xpath_query::~xpath_query()
  10158. {
  10159. if (_impl)
  10160. impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
  10161. }
  10162. #ifdef PUGIXML_HAS_MOVE
  10163. PUGI_IMPL_FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
  10164. {
  10165. _impl = rhs._impl;
  10166. _result = rhs._result;
  10167. rhs._impl = 0;
  10168. rhs._result = xpath_parse_result();
  10169. }
  10170. PUGI_IMPL_FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
  10171. {
  10172. if (this == &rhs) return *this;
  10173. if (_impl)
  10174. impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
  10175. _impl = rhs._impl;
  10176. _result = rhs._result;
  10177. rhs._impl = 0;
  10178. rhs._result = xpath_parse_result();
  10179. return *this;
  10180. }
  10181. #endif
  10182. PUGI_IMPL_FN xpath_value_type xpath_query::return_type() const
  10183. {
  10184. if (!_impl) return xpath_type_none;
  10185. return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
  10186. }
  10187. PUGI_IMPL_FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
  10188. {
  10189. if (!_impl) return false;
  10190. impl::xpath_context c(n, 1, 1);
  10191. impl::xpath_stack_data sd;
  10192. bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
  10193. if (sd.oom)
  10194. {
  10195. #ifdef PUGIXML_NO_EXCEPTIONS
  10196. return false;
  10197. #else
  10198. throw std::bad_alloc();
  10199. #endif
  10200. }
  10201. return r;
  10202. }
  10203. PUGI_IMPL_FN double xpath_query::evaluate_number(const xpath_node& n) const
  10204. {
  10205. if (!_impl) return impl::gen_nan();
  10206. impl::xpath_context c(n, 1, 1);
  10207. impl::xpath_stack_data sd;
  10208. double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
  10209. if (sd.oom)
  10210. {
  10211. #ifdef PUGIXML_NO_EXCEPTIONS
  10212. return impl::gen_nan();
  10213. #else
  10214. throw std::bad_alloc();
  10215. #endif
  10216. }
  10217. return r;
  10218. }
  10219. #ifndef PUGIXML_NO_STL
  10220. PUGI_IMPL_FN string_t xpath_query::evaluate_string(const xpath_node& n) const
  10221. {
  10222. if (!_impl) return string_t();
  10223. impl::xpath_context c(n, 1, 1);
  10224. impl::xpath_stack_data sd;
  10225. impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
  10226. if (sd.oom)
  10227. {
  10228. #ifdef PUGIXML_NO_EXCEPTIONS
  10229. return string_t();
  10230. #else
  10231. throw std::bad_alloc();
  10232. #endif
  10233. }
  10234. return string_t(r.c_str(), r.length());
  10235. }
  10236. #endif
  10237. PUGI_IMPL_FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
  10238. {
  10239. impl::xpath_context c(n, 1, 1);
  10240. impl::xpath_stack_data sd;
  10241. impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
  10242. if (sd.oom)
  10243. {
  10244. #ifdef PUGIXML_NO_EXCEPTIONS
  10245. r = impl::xpath_string();
  10246. #else
  10247. throw std::bad_alloc();
  10248. #endif
  10249. }
  10250. size_t full_size = r.length() + 1;
  10251. if (capacity > 0)
  10252. {
  10253. size_t size = (full_size < capacity) ? full_size : capacity;
  10254. assert(size > 0);
  10255. memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
  10256. buffer[size - 1] = 0;
  10257. }
  10258. return full_size;
  10259. }
  10260. PUGI_IMPL_FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
  10261. {
  10262. impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
  10263. if (!root) return xpath_node_set();
  10264. impl::xpath_context c(n, 1, 1);
  10265. impl::xpath_stack_data sd;
  10266. impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
  10267. if (sd.oom)
  10268. {
  10269. #ifdef PUGIXML_NO_EXCEPTIONS
  10270. return xpath_node_set();
  10271. #else
  10272. throw std::bad_alloc();
  10273. #endif
  10274. }
  10275. return xpath_node_set(r.begin(), r.end(), r.type());
  10276. }
  10277. PUGI_IMPL_FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
  10278. {
  10279. impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
  10280. if (!root) return xpath_node();
  10281. impl::xpath_context c(n, 1, 1);
  10282. impl::xpath_stack_data sd;
  10283. impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
  10284. if (sd.oom)
  10285. {
  10286. #ifdef PUGIXML_NO_EXCEPTIONS
  10287. return xpath_node();
  10288. #else
  10289. throw std::bad_alloc();
  10290. #endif
  10291. }
  10292. return r.first();
  10293. }
  10294. PUGI_IMPL_FN const xpath_parse_result& xpath_query::result() const
  10295. {
  10296. return _result;
  10297. }
  10298. PUGI_IMPL_FN static void unspecified_bool_xpath_query(xpath_query***)
  10299. {
  10300. }
  10301. PUGI_IMPL_FN xpath_query::operator xpath_query::unspecified_bool_type() const
  10302. {
  10303. return _impl ? unspecified_bool_xpath_query : 0;
  10304. }
  10305. PUGI_IMPL_FN bool xpath_query::operator!() const
  10306. {
  10307. return !_impl;
  10308. }
  10309. PUGI_IMPL_FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
  10310. {
  10311. xpath_query q(query, variables);
  10312. return q.evaluate_node(*this);
  10313. }
  10314. PUGI_IMPL_FN xpath_node xml_node::select_node(const xpath_query& query) const
  10315. {
  10316. return query.evaluate_node(*this);
  10317. }
  10318. PUGI_IMPL_FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
  10319. {
  10320. xpath_query q(query, variables);
  10321. return q.evaluate_node_set(*this);
  10322. }
  10323. PUGI_IMPL_FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
  10324. {
  10325. return query.evaluate_node_set(*this);
  10326. }
  10327. PUGI_IMPL_FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
  10328. {
  10329. xpath_query q(query, variables);
  10330. return q.evaluate_node(*this);
  10331. }
  10332. PUGI_IMPL_FN xpath_node xml_node::select_single_node(const xpath_query& query) const
  10333. {
  10334. return query.evaluate_node(*this);
  10335. }
  10336. }
  10337. #endif
  10338. #ifdef __BORLANDC__
  10339. # pragma option pop
  10340. #endif
  10341. // Intel C++ does not properly keep warning state for function templates,
  10342. // so popping warning state at the end of translation unit leads to warnings in the middle.
  10343. #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
  10344. # pragma warning(pop)
  10345. #endif
  10346. #if defined(_MSC_VER) && defined(__c2__)
  10347. # pragma clang diagnostic pop
  10348. #endif
  10349. // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
  10350. #undef PUGI_IMPL_NO_INLINE
  10351. #undef PUGI_IMPL_UNLIKELY
  10352. #undef PUGI_IMPL_STATIC_ASSERT
  10353. #undef PUGI_IMPL_DMC_VOLATILE
  10354. #undef PUGI_IMPL_UNSIGNED_OVERFLOW
  10355. #undef PUGI_IMPL_MSVC_CRT_VERSION
  10356. #undef PUGI_IMPL_SNPRINTF
  10357. #undef PUGI_IMPL_NS_BEGIN
  10358. #undef PUGI_IMPL_NS_END
  10359. #undef PUGI_IMPL_FN
  10360. #undef PUGI_IMPL_FN_NO_INLINE
  10361. #undef PUGI_IMPL_GETHEADER_IMPL
  10362. #undef PUGI_IMPL_GETPAGE_IMPL
  10363. #undef PUGI_IMPL_GETPAGE
  10364. #undef PUGI_IMPL_NODETYPE
  10365. #undef PUGI_IMPL_IS_CHARTYPE_IMPL
  10366. #undef PUGI_IMPL_IS_CHARTYPE
  10367. #undef PUGI_IMPL_IS_CHARTYPEX
  10368. #undef PUGI_IMPL_ENDSWITH
  10369. #undef PUGI_IMPL_SKIPWS
  10370. #undef PUGI_IMPL_OPTSET
  10371. #undef PUGI_IMPL_PUSHNODE
  10372. #undef PUGI_IMPL_POPNODE
  10373. #undef PUGI_IMPL_SCANFOR
  10374. #undef PUGI_IMPL_SCANWHILE
  10375. #undef PUGI_IMPL_SCANWHILE_UNROLL
  10376. #undef PUGI_IMPL_ENDSEG
  10377. #undef PUGI_IMPL_THROW_ERROR
  10378. #undef PUGI_IMPL_CHECK_ERROR
  10379. #endif
  10380. /**
  10381. * Copyright (c) 2006-2023 Arseny Kapoulkine
  10382. *
  10383. * Permission is hereby granted, free of charge, to any person
  10384. * obtaining a copy of this software and associated documentation
  10385. * files (the "Software"), to deal in the Software without
  10386. * restriction, including without limitation the rights to use,
  10387. * copy, modify, merge, publish, distribute, sublicense, and/or sell
  10388. * copies of the Software, and to permit persons to whom the
  10389. * Software is furnished to do so, subject to the following
  10390. * conditions:
  10391. *
  10392. * The above copyright notice and this permission notice shall be
  10393. * included in all copies or substantial portions of the Software.
  10394. *
  10395. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  10396. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  10397. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  10398. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  10399. * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  10400. * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  10401. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  10402. * OTHER DEALINGS IN THE SOFTWARE.
  10403. */