Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1270 lines
31KB

  1. /*
  2. Copyright 2011-2014 David Robillard <http://drobilla.net>
  3. Permission to use, copy, modify, and/or distribute this software for any
  4. purpose with or without fee is hereby granted, provided that the above
  5. copyright notice and this permission notice appear in all copies.
  6. THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  7. WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  8. MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  9. ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  10. WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  11. ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  12. OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  13. */
  14. // C99
  15. #include <assert.h>
  16. #include <errno.h>
  17. #include <stdint.h>
  18. #include <stdio.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. #define ZIX_INLINE
  22. #include "zix/digest.c"
  23. #include "zix/hash.c"
  24. #include "zix/btree.c"
  25. #include "sord_config.h"
  26. #include "sord_internal.h"
  27. #define SORD_LOG(prefix, ...) fprintf(stderr, "[Sord::" prefix "] " __VA_ARGS__)
  28. #ifdef SORD_DEBUG_ITER
  29. # define SORD_ITER_LOG(...) SORD_LOG("iter", __VA_ARGS__)
  30. #else
  31. # define SORD_ITER_LOG(...)
  32. #endif
  33. #ifdef SORD_DEBUG_SEARCH
  34. # define SORD_FIND_LOG(...) SORD_LOG("search", __VA_ARGS__)
  35. #else
  36. # define SORD_FIND_LOG(...)
  37. #endif
  38. #ifdef SORD_DEBUG_WRITE
  39. # define SORD_WRITE_LOG(...) SORD_LOG("write", __VA_ARGS__)
  40. #else
  41. # define SORD_WRITE_LOG(...)
  42. #endif
  43. #define NUM_ORDERS 12
  44. #define STATEMENT_LEN 3
  45. #define TUP_LEN STATEMENT_LEN + 1
  46. #define DEFAULT_ORDER SPO
  47. #define DEFAULT_GRAPH_ORDER GSPO
  48. #define TUP_FMT "(%s %s %s %s)"
  49. #define TUP_FMT_ELEM(e) ((e) ? sord_node_get_string(e) : (const uint8_t*)"*")
  50. #define TUP_FMT_ARGS(t) \
  51. TUP_FMT_ELEM((t)[0]), \
  52. TUP_FMT_ELEM((t)[1]), \
  53. TUP_FMT_ELEM((t)[2]), \
  54. TUP_FMT_ELEM((t)[3])
  55. #define TUP_S 0
  56. #define TUP_P 1
  57. #define TUP_O 2
  58. #define TUP_G 3
  59. /** Triple ordering */
  60. typedef enum {
  61. SPO, ///< Subject, Predicate, Object
  62. SOP, ///< Subject, Object, Predicate
  63. OPS, ///< Object, Predicate, Subject
  64. OSP, ///< Object, Subject, Predicate
  65. PSO, ///< Predicate, Subject, Object
  66. POS, ///< Predicate, Object, Subject
  67. GSPO, ///< Graph, Subject, Predicate, Object
  68. GSOP, ///< Graph, Subject, Object, Predicate
  69. GOPS, ///< Graph, Object, Predicate, Subject
  70. GOSP, ///< Graph, Object, Subject, Predicate
  71. GPSO, ///< Graph, Predicate, Subject, Object
  72. GPOS, ///< Graph, Predicate, Object, Subject
  73. } SordOrder;
  74. /** String name of each ordering (array indexed by SordOrder) */
  75. static const char* const order_names[NUM_ORDERS] = {
  76. "spo", "sop", "ops", "osp", "pso", "pos",
  77. "gspo", "gsop", "gops", "gosp", "gpso", "gpos"
  78. };
  79. /**
  80. Quads of indices for each order, from most to least significant
  81. (array indexed by SordOrder)
  82. */
  83. static const int orderings[NUM_ORDERS][TUP_LEN] = {
  84. { 0, 1, 2, 3 }, { 0, 2, 1, 3 }, // SPO, SOP
  85. { 2, 1, 0, 3 }, { 2, 0, 1, 3 }, // OPS, OSP
  86. { 1, 0, 2, 3 }, { 1, 2, 0, 3 }, // PSO, POS
  87. { 3, 0, 1, 2 }, { 3, 0, 2, 1 }, // GSPO, GSOP
  88. { 3, 2, 1, 0 }, { 3, 2, 0, 1 }, // GOPS, GOSP
  89. { 3, 1, 0, 2 }, { 3, 1, 2, 0 } // GPSO, GPOS
  90. };
  91. /** World */
  92. struct SordWorldImpl {
  93. ZixHash* nodes;
  94. SerdErrorSink error_sink;
  95. void* error_handle;
  96. };
  97. /** Store */
  98. struct SordModelImpl {
  99. SordWorld* world;
  100. /** Index for each possible triple ordering (may or may not exist).
  101. * Each index is a tree of SordQuad with the appropriate ordering.
  102. */
  103. ZixBTree* indices[NUM_ORDERS];
  104. size_t n_quads;
  105. size_t n_iters;
  106. };
  107. /** Mode for searching or iteration */
  108. typedef enum {
  109. ALL, ///< Iterate over entire store
  110. SINGLE, ///< Iteration over a single element (exact search)
  111. RANGE, ///< Iterate over range with equal prefix
  112. FILTER_RANGE, ///< Iterate over range with equal prefix, filtering
  113. FILTER_ALL ///< Iterate to end of store, filtering
  114. } SearchMode;
  115. /** Iterator over some range of a store */
  116. struct SordIterImpl {
  117. const SordModel* sord; ///< Model being iterated over
  118. ZixBTreeIter* cur; ///< Current DB cursor
  119. SordQuad pat; ///< Pattern (in ordering order)
  120. SordOrder order; ///< Store order (which index)
  121. SearchMode mode; ///< Iteration mode
  122. int n_prefix; ///< Prefix for RANGE and FILTER_RANGE
  123. bool end; ///< True iff reached end
  124. bool skip_graphs; ///< Iteration should ignore graphs
  125. };
  126. static uint32_t
  127. sord_node_hash(const void* n)
  128. {
  129. const SordNode* node = (const SordNode*)n;
  130. uint32_t hash = zix_digest_start();
  131. hash = zix_digest_add(hash, node->node.buf, node->node.n_bytes);
  132. hash = zix_digest_add(hash, &node->node.type, sizeof(node->node.type));
  133. if (node->node.type == SERD_LITERAL) {
  134. hash = zix_digest_add(hash, &node->meta.lit, sizeof(node->meta.lit));
  135. }
  136. return hash;
  137. }
  138. static bool
  139. sord_node_hash_equal(const void* a, const void* b)
  140. {
  141. const SordNode* a_node = (const SordNode*)a;
  142. const SordNode* b_node = (const SordNode*)b;
  143. return (a_node == b_node)
  144. || ((a_node->node.type == b_node->node.type) &&
  145. (a_node->node.type != SERD_LITERAL ||
  146. (a_node->meta.lit.datatype == b_node->meta.lit.datatype &&
  147. !strncmp(a_node->meta.lit.lang,
  148. b_node->meta.lit.lang,
  149. sizeof(a_node->meta.lit.lang)))) &&
  150. (serd_node_equals(&a_node->node, &b_node->node)));
  151. }
  152. static void
  153. error(SordWorld* world, SerdStatus st, const char* fmt, ...)
  154. {
  155. va_list args;
  156. va_start(args, fmt);
  157. const SerdError e = { st, NULL, 0, 0, fmt, &args };
  158. if (world->error_sink) {
  159. world->error_sink(world->error_handle, &e);
  160. } else {
  161. fprintf(stderr, "error: ");
  162. vfprintf(stderr, fmt, args);
  163. }
  164. va_end(args);
  165. }
  166. SordWorld*
  167. sord_world_new(void)
  168. {
  169. SordWorld* world = (SordWorld*)malloc(sizeof(SordWorld));
  170. world->error_sink = NULL;
  171. world->error_handle = NULL;
  172. world->nodes = zix_hash_new(
  173. sord_node_hash, sord_node_hash_equal, sizeof(SordNode));
  174. return world;
  175. }
  176. static void
  177. free_node_entry(void* value, void* user_data)
  178. {
  179. SordNode* node = (SordNode*)value;
  180. if (node->node.type == SERD_LITERAL) {
  181. sord_node_free((SordWorld*)user_data, node->meta.lit.datatype);
  182. }
  183. free((uint8_t*)node->node.buf);
  184. }
  185. void
  186. sord_world_free(SordWorld* world)
  187. {
  188. zix_hash_foreach(world->nodes, free_node_entry, world);
  189. zix_hash_free(world->nodes);
  190. free(world);
  191. }
  192. void
  193. sord_world_set_error_sink(SordWorld* world,
  194. SerdErrorSink error_sink,
  195. void* handle)
  196. {
  197. world->error_sink = error_sink;
  198. world->error_handle = handle;
  199. }
  200. /** Compare nodes, considering NULL a wildcard match. */
  201. static inline int
  202. sord_node_compare(const SordNode* a, const SordNode* b)
  203. {
  204. if (a == b || !a || !b) {
  205. return 0; // Exact or wildcard match
  206. } else if (a->node.type != b->node.type) {
  207. return a->node.type - b->node.type;
  208. }
  209. int cmp = 0;
  210. switch (a->node.type) {
  211. case SERD_URI:
  212. case SERD_BLANK:
  213. return strcmp((const char*)a->node.buf, (const char*)b->node.buf);
  214. case SERD_LITERAL:
  215. cmp = strcmp((const char*)sord_node_get_string(a),
  216. (const char*)sord_node_get_string(b));
  217. if (cmp == 0) {
  218. // Note: Can't use sord_node_compare here since it does wildcards
  219. if (!a->meta.lit.datatype || !b->meta.lit.datatype) {
  220. cmp = a->meta.lit.datatype - b->meta.lit.datatype;
  221. } else {
  222. cmp = strcmp((const char*)a->meta.lit.datatype->node.buf,
  223. (const char*)b->meta.lit.datatype->node.buf);
  224. }
  225. }
  226. if (cmp == 0) {
  227. cmp = strcmp(a->meta.lit.lang, b->meta.lit.lang);
  228. }
  229. default:
  230. break;
  231. }
  232. return cmp;
  233. }
  234. bool
  235. sord_node_equals(const SordNode* a, const SordNode* b)
  236. {
  237. return a == b; // Nodes are interned
  238. }
  239. /** Return true iff IDs are equivalent, or one is a wildcard */
  240. static inline bool
  241. sord_id_match(const SordNode* a, const SordNode* b)
  242. {
  243. return !a || !b || (a == b);
  244. }
  245. static inline bool
  246. sord_quad_match_inline(const SordQuad x, const SordQuad y)
  247. {
  248. return sord_id_match(x[0], y[0])
  249. && sord_id_match(x[1], y[1])
  250. && sord_id_match(x[2], y[2])
  251. && sord_id_match(x[3], y[3]);
  252. }
  253. bool
  254. sord_quad_match(const SordQuad x, const SordQuad y)
  255. {
  256. return sord_quad_match_inline(x, y);
  257. }
  258. /**
  259. Compare two quad IDs lexicographically.
  260. NULL IDs (equal to 0) are treated as wildcards, always less than every
  261. other possible ID, except itself.
  262. */
  263. static int
  264. sord_quad_compare(const void* x_ptr, const void* y_ptr, void* user_data)
  265. {
  266. const int* const ordering = (const int*)user_data;
  267. const SordNode*const*const x = (const SordNode*const*)x_ptr;
  268. const SordNode*const*const y = (const SordNode*const*)y_ptr;
  269. for (int i = 0; i < TUP_LEN; ++i) {
  270. const int idx = ordering[i];
  271. const int cmp = sord_node_compare(x[idx], y[idx]);
  272. if (cmp) {
  273. return cmp;
  274. }
  275. }
  276. return 0;
  277. }
  278. static inline bool
  279. sord_iter_forward(SordIter* iter)
  280. {
  281. if (!iter->skip_graphs) {
  282. zix_btree_iter_increment(iter->cur);
  283. return zix_btree_iter_is_end(iter->cur);
  284. }
  285. SordNode** key = (SordNode**)zix_btree_get(iter->cur);
  286. const SordQuad initial = { key[0], key[1], key[2], key[3] };
  287. while (true) {
  288. zix_btree_iter_increment(iter->cur);
  289. if (zix_btree_iter_is_end(iter->cur))
  290. return true;
  291. key = (SordNode**)zix_btree_get(iter->cur);
  292. for (int i = 0; i < 3; ++i)
  293. if (key[i] != initial[i])
  294. return false;
  295. }
  296. assert(false);
  297. }
  298. /**
  299. Seek forward as necessary until `iter` points at a match.
  300. @return true iff iterator reached end of valid range.
  301. */
  302. static inline bool
  303. sord_iter_seek_match(SordIter* iter)
  304. {
  305. for (iter->end = true;
  306. !zix_btree_iter_is_end(iter->cur);
  307. sord_iter_forward(iter)) {
  308. const SordNode** const key = (const SordNode**)zix_btree_get(iter->cur);
  309. if (sord_quad_match_inline(key, iter->pat))
  310. return (iter->end = false);
  311. }
  312. return true;
  313. }
  314. /**
  315. Seek forward as necessary until `iter` points at a match, or the prefix
  316. no longer matches iter->pat.
  317. @return true iff iterator reached end of valid range.
  318. */
  319. static inline bool
  320. sord_iter_seek_match_range(SordIter* iter)
  321. {
  322. if (iter->end)
  323. return true;
  324. do {
  325. const SordNode** key = (const SordNode**)zix_btree_get(iter->cur);
  326. if (sord_quad_match_inline(key, iter->pat))
  327. return false; // Found match
  328. for (int i = 0; i < iter->n_prefix; ++i) {
  329. const int idx = orderings[iter->order][i];
  330. if (!sord_id_match(key[idx], iter->pat[idx])) {
  331. iter->end = true; // Reached end of valid range
  332. return true;
  333. }
  334. }
  335. } while (!sord_iter_forward(iter));
  336. return (iter->end = true); // Reached end
  337. }
  338. static SordIter*
  339. sord_iter_new(const SordModel* sord, ZixBTreeIter* cur, const SordQuad pat,
  340. SordOrder order, SearchMode mode, int n_prefix)
  341. {
  342. SordIter* iter = (SordIter*)malloc(sizeof(SordIter));
  343. iter->sord = sord;
  344. iter->cur = cur;
  345. iter->order = order;
  346. iter->mode = mode;
  347. iter->n_prefix = n_prefix;
  348. iter->end = false;
  349. iter->skip_graphs = order < GSPO;
  350. for (int i = 0; i < TUP_LEN; ++i) {
  351. iter->pat[i] = pat[i];
  352. }
  353. switch (iter->mode) {
  354. case ALL:
  355. case SINGLE:
  356. case RANGE:
  357. assert(
  358. sord_quad_match_inline((const SordNode**)zix_btree_get(iter->cur),
  359. iter->pat));
  360. break;
  361. case FILTER_RANGE:
  362. sord_iter_seek_match_range(iter);
  363. break;
  364. case FILTER_ALL:
  365. sord_iter_seek_match(iter);
  366. break;
  367. }
  368. #ifdef SORD_DEBUG_ITER
  369. SordQuad value;
  370. sord_iter_get(iter, value);
  371. SORD_ITER_LOG("New %p pat=" TUP_FMT " cur=" TUP_FMT " end=%d skip=%d\n",
  372. (void*)iter, TUP_FMT_ARGS(pat), TUP_FMT_ARGS(value),
  373. iter->end, iter->skip_graphs);
  374. #endif
  375. ++((SordModel*)sord)->n_iters;
  376. return iter;
  377. }
  378. const SordModel*
  379. sord_iter_get_model(SordIter* iter)
  380. {
  381. return iter->sord;
  382. }
  383. void
  384. sord_iter_get(const SordIter* iter, SordQuad id)
  385. {
  386. SordNode** key = (SordNode**)zix_btree_get(iter->cur);
  387. for (int i = 0; i < TUP_LEN; ++i) {
  388. id[i] = key[i];
  389. }
  390. }
  391. const SordNode*
  392. sord_iter_get_node(const SordIter* iter, SordQuadIndex index)
  393. {
  394. return iter ? ((SordNode**)zix_btree_get(iter->cur))[index] : NULL;
  395. }
  396. bool
  397. sord_iter_next(SordIter* iter)
  398. {
  399. if (iter->end)
  400. return true;
  401. const SordNode** key;
  402. iter->end = sord_iter_forward(iter);
  403. if (!iter->end) {
  404. switch (iter->mode) {
  405. case ALL:
  406. // At the end if the cursor is (assigned above)
  407. break;
  408. case SINGLE:
  409. iter->end = true;
  410. SORD_ITER_LOG("%p reached single end\n", (void*)iter);
  411. break;
  412. case RANGE:
  413. SORD_ITER_LOG("%p range next\n", (void*)iter);
  414. // At the end if the MSNs no longer match
  415. key = (const SordNode**)zix_btree_get(iter->cur);
  416. assert(key);
  417. for (int i = 0; i < iter->n_prefix; ++i) {
  418. const int idx = orderings[iter->order][i];
  419. if (!sord_id_match(key[idx], iter->pat[idx])) {
  420. iter->end = true;
  421. SORD_ITER_LOG("%p reached non-match end\n", (void*)iter);
  422. break;
  423. }
  424. }
  425. break;
  426. case FILTER_RANGE:
  427. // Seek forward to next match, stopping if prefix changes
  428. sord_iter_seek_match_range(iter);
  429. break;
  430. case FILTER_ALL:
  431. // Seek forward to next match
  432. sord_iter_seek_match(iter);
  433. break;
  434. }
  435. } else {
  436. SORD_ITER_LOG("%p reached index end\n", (void*)iter);
  437. }
  438. if (iter->end) {
  439. SORD_ITER_LOG("%p Reached end\n", (void*)iter);
  440. return true;
  441. } else {
  442. #ifdef SORD_DEBUG_ITER
  443. SordQuad tup;
  444. sord_iter_get(iter, tup);
  445. SORD_ITER_LOG("%p Increment to " TUP_FMT "\n",
  446. (void*)iter, TUP_FMT_ARGS(tup));
  447. #endif
  448. return false;
  449. }
  450. }
  451. bool
  452. sord_iter_end(const SordIter* iter)
  453. {
  454. return !iter || iter->end;
  455. }
  456. void
  457. sord_iter_free(SordIter* iter)
  458. {
  459. SORD_ITER_LOG("%p Free\n", (void*)iter);
  460. if (iter) {
  461. --((SordModel*)iter->sord)->n_iters;
  462. zix_btree_iter_free(iter->cur);
  463. free(iter);
  464. }
  465. }
  466. /**
  467. Return true iff `sord` has an index for `order`.
  468. If `graphs` is true, `order` will be modified to be the
  469. corresponding order with a G prepended (so G will be the MSN).
  470. */
  471. static inline bool
  472. sord_has_index(SordModel* sord, SordOrder* order, int* n_prefix, bool graphs)
  473. {
  474. if (graphs) {
  475. *order = (SordOrder)(*order + GSPO);
  476. *n_prefix += 1;
  477. }
  478. return sord->indices[*order];
  479. }
  480. /**
  481. Return the best available index for a pattern.
  482. @param pat Pattern in standard (S P O G) order
  483. @param mode Set to the (best) iteration mode for iterating over results
  484. @param n_prefix Set to the length of the range prefix
  485. (for `mode` == RANGE and `mode` == FILTER_RANGE)
  486. */
  487. static inline SordOrder
  488. sord_best_index(SordModel* sord,
  489. const SordQuad pat,
  490. SearchMode* mode,
  491. int* n_prefix)
  492. {
  493. const bool graph_search = (pat[TUP_G] != 0);
  494. const unsigned sig
  495. = (pat[0] ? 1 : 0) * 0x100
  496. + (pat[1] ? 1 : 0) * 0x010
  497. + (pat[2] ? 1 : 0) * 0x001;
  498. SordOrder good[2] = { (SordOrder)-1, (SordOrder)-1 };
  499. #define PAT_CASE(sig, m, g0, g1, np) \
  500. case sig: \
  501. *mode = m; \
  502. good[0] = g0; \
  503. good[1] = g1; \
  504. *n_prefix = np; \
  505. break
  506. // Good orderings that don't require filtering
  507. *mode = RANGE;
  508. *n_prefix = 0;
  509. switch (sig) {
  510. case 0x000:
  511. if (graph_search) {
  512. *mode = RANGE;
  513. *n_prefix = 1;
  514. return DEFAULT_GRAPH_ORDER;
  515. } else {
  516. *mode = ALL;
  517. return DEFAULT_ORDER;
  518. }
  519. case 0x111:
  520. *mode = SINGLE;
  521. return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER;
  522. PAT_CASE(0x001, RANGE, OPS, OSP, 1);
  523. PAT_CASE(0x010, RANGE, POS, PSO, 1);
  524. PAT_CASE(0x011, RANGE, OPS, POS, 2);
  525. PAT_CASE(0x100, RANGE, SPO, SOP, 1);
  526. PAT_CASE(0x101, RANGE, SOP, OSP, 2);
  527. PAT_CASE(0x110, RANGE, SPO, PSO, 2);
  528. }
  529. if (*mode == RANGE) {
  530. if (sord_has_index(sord, &good[0], n_prefix, graph_search)) {
  531. return good[0];
  532. } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) {
  533. return good[1];
  534. }
  535. }
  536. // Not so good orderings that require filtering, but can
  537. // still be constrained to a range
  538. switch (sig) {
  539. PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1);
  540. PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1);
  541. PAT_CASE(0x110, FILTER_RANGE, SOP, POS, 1);
  542. default: break;
  543. }
  544. if (*mode == FILTER_RANGE) {
  545. if (sord_has_index(sord, &good[0], n_prefix, graph_search)) {
  546. return good[0];
  547. } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) {
  548. return good[1];
  549. }
  550. }
  551. if (graph_search) {
  552. *mode = FILTER_RANGE;
  553. *n_prefix = 1;
  554. return DEFAULT_GRAPH_ORDER;
  555. } else {
  556. *mode = FILTER_ALL;
  557. return DEFAULT_ORDER;
  558. }
  559. }
  560. SordModel*
  561. sord_new(SordWorld* world, unsigned indices, bool graphs)
  562. {
  563. SordModel* sord = (SordModel*)malloc(sizeof(struct SordModelImpl));
  564. sord->world = world;
  565. sord->n_quads = 0;
  566. sord->n_iters = 0;
  567. for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) {
  568. const int* const ordering = orderings[i];
  569. const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)];
  570. if (indices & (1 << i)) {
  571. sord->indices[i] = zix_btree_new(
  572. sord_quad_compare, (void*)ordering, NULL);
  573. if (graphs) {
  574. sord->indices[i + (NUM_ORDERS / 2)] = zix_btree_new(
  575. sord_quad_compare, (void*)g_ordering, NULL);
  576. } else {
  577. sord->indices[i + (NUM_ORDERS / 2)] = NULL;
  578. }
  579. } else {
  580. sord->indices[i] = NULL;
  581. sord->indices[i + (NUM_ORDERS / 2)] = NULL;
  582. }
  583. }
  584. if (!sord->indices[DEFAULT_ORDER]) {
  585. sord->indices[DEFAULT_ORDER] = zix_btree_new(
  586. sord_quad_compare, (void*)orderings[DEFAULT_ORDER], NULL);
  587. }
  588. if (graphs && !sord->indices[DEFAULT_GRAPH_ORDER]) {
  589. sord->indices[DEFAULT_GRAPH_ORDER] = zix_btree_new(
  590. sord_quad_compare, (void*)orderings[DEFAULT_GRAPH_ORDER], NULL);
  591. }
  592. return sord;
  593. }
  594. static void
  595. sord_node_free_internal(SordWorld* world, SordNode* node)
  596. {
  597. assert(node->refs == 0);
  598. // Cache pointer to buffer to free after node removal and destruction
  599. const uint8_t* const buf = node->node.buf;
  600. // Remove node from hash (which frees the node)
  601. if (zix_hash_remove(world->nodes, node)) {
  602. error(world, SERD_ERR_INTERNAL, "failed to remove node from hash\n");
  603. }
  604. // Free buffer
  605. free((uint8_t*)buf);
  606. }
  607. static void
  608. sord_add_quad_ref(SordModel* sord, const SordNode* node, SordQuadIndex i)
  609. {
  610. if (node) {
  611. assert(node->refs > 0);
  612. ++((SordNode*)node)->refs;
  613. if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) {
  614. ++((SordNode*)node)->meta.res.refs_as_obj;
  615. }
  616. }
  617. }
  618. static void
  619. sord_drop_quad_ref(SordModel* sord, const SordNode* node, SordQuadIndex i)
  620. {
  621. if (!node) {
  622. return;
  623. }
  624. assert(node->refs > 0);
  625. if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) {
  626. assert(node->meta.res.refs_as_obj > 0);
  627. --((SordNode*)node)->meta.res.refs_as_obj;
  628. }
  629. if (--((SordNode*)node)->refs == 0) {
  630. sord_node_free_internal(sord_get_world(sord), (SordNode*)node);
  631. }
  632. }
  633. void
  634. sord_free(SordModel* sord)
  635. {
  636. if (!sord)
  637. return;
  638. // Free nodes
  639. SordQuad tup;
  640. SordIter* i = sord_begin(sord);
  641. for (; !sord_iter_end(i); sord_iter_next(i)) {
  642. sord_iter_get(i, tup);
  643. for (int t = 0; t < TUP_LEN; ++t) {
  644. sord_drop_quad_ref(sord, tup[t], (SordQuadIndex)t);
  645. }
  646. }
  647. sord_iter_free(i);
  648. // Free quads
  649. ZixBTreeIter* t = zix_btree_begin(sord->indices[DEFAULT_ORDER]);
  650. for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) {
  651. free(zix_btree_get(t));
  652. }
  653. zix_btree_iter_free(t);
  654. // Free indices
  655. for (unsigned o = 0; o < NUM_ORDERS; ++o)
  656. if (sord->indices[o])
  657. zix_btree_free(sord->indices[o]);
  658. free(sord);
  659. }
  660. SordWorld*
  661. sord_get_world(SordModel* sord)
  662. {
  663. return sord->world;
  664. }
  665. size_t
  666. sord_num_quads(const SordModel* sord)
  667. {
  668. return sord->n_quads;
  669. }
  670. size_t
  671. sord_num_nodes(const SordWorld* world)
  672. {
  673. return zix_hash_size(world->nodes);
  674. }
  675. SordIter*
  676. sord_begin(const SordModel* sord)
  677. {
  678. if (sord_num_quads(sord) == 0) {
  679. return NULL;
  680. } else {
  681. ZixBTreeIter* cur = zix_btree_begin(sord->indices[DEFAULT_ORDER]);
  682. SordQuad pat = { 0, 0, 0, 0 };
  683. return sord_iter_new(sord, cur, pat, DEFAULT_ORDER, ALL, 0);
  684. }
  685. }
  686. SordIter*
  687. sord_find(SordModel* sord, const SordQuad pat)
  688. {
  689. if (!pat[0] && !pat[1] && !pat[2] && !pat[3])
  690. return sord_begin(sord);
  691. SearchMode mode;
  692. int n_prefix;
  693. const SordOrder index_order = sord_best_index(sord, pat, &mode, &n_prefix);
  694. SORD_FIND_LOG("Find " TUP_FMT " index=%s mode=%d n_prefix=%d\n",
  695. TUP_FMT_ARGS(pat), order_names[index_order], mode, n_prefix);
  696. if (pat[0] && pat[1] && pat[2] && pat[3])
  697. mode = SINGLE; // No duplicate quads (Sord is a set)
  698. ZixBTree* const db = sord->indices[index_order];
  699. ZixBTreeIter* cur = NULL;
  700. zix_btree_lower_bound(db, pat, &cur);
  701. if (zix_btree_iter_is_end(cur)) {
  702. SORD_FIND_LOG("No match found\n");
  703. zix_btree_iter_free(cur);
  704. return NULL;
  705. }
  706. const SordNode** const key = (const SordNode**)zix_btree_get(cur);
  707. if (!key || ( (mode == RANGE || mode == SINGLE)
  708. && !sord_quad_match_inline(pat, key) )) {
  709. SORD_FIND_LOG("No match found\n");
  710. zix_btree_iter_free(cur);
  711. return NULL;
  712. }
  713. return sord_iter_new(sord, cur, pat, index_order, mode, n_prefix);
  714. }
  715. SordIter*
  716. sord_search(SordModel* model,
  717. const SordNode* s,
  718. const SordNode* p,
  719. const SordNode* o,
  720. const SordNode* g)
  721. {
  722. SordQuad pat = { s, p, o, g };
  723. return sord_find(model, pat);
  724. }
  725. SordNode*
  726. sord_get(SordModel* model,
  727. const SordNode* s,
  728. const SordNode* p,
  729. const SordNode* o,
  730. const SordNode* g)
  731. {
  732. if ((bool)s + (bool)p + (bool)o != 2) {
  733. return NULL;
  734. }
  735. SordIter* i = sord_search(model, s, p, o, g);
  736. SordNode* ret = NULL;
  737. if (!s) {
  738. ret = sord_node_copy(sord_iter_get_node(i, SORD_SUBJECT));
  739. } else if (!p) {
  740. ret = sord_node_copy(sord_iter_get_node(i, SORD_PREDICATE));
  741. } else if (!o) {
  742. ret = sord_node_copy(sord_iter_get_node(i, SORD_OBJECT));
  743. }
  744. sord_iter_free(i);
  745. return ret;
  746. }
  747. bool
  748. sord_ask(SordModel* model,
  749. const SordNode* s,
  750. const SordNode* p,
  751. const SordNode* o,
  752. const SordNode* g)
  753. {
  754. SordQuad pat = { s, p, o, g };
  755. return sord_contains(model, pat);
  756. }
  757. uint64_t
  758. sord_count(SordModel* model,
  759. const SordNode* s,
  760. const SordNode* p,
  761. const SordNode* o,
  762. const SordNode* g)
  763. {
  764. SordIter* i = sord_search(model, s, p, o, g);
  765. uint64_t n = 0;
  766. for (; !sord_iter_end(i); sord_iter_next(i)) {
  767. ++n;
  768. }
  769. sord_iter_free(i);
  770. return n;
  771. }
  772. bool
  773. sord_contains(SordModel* sord, const SordQuad pat)
  774. {
  775. SordIter* iter = sord_find(sord, pat);
  776. bool ret = (iter != NULL);
  777. sord_iter_free(iter);
  778. return ret;
  779. }
  780. static uint8_t*
  781. sord_strndup(const uint8_t* str, size_t len)
  782. {
  783. uint8_t* dup = (uint8_t*)malloc(len + 1);
  784. memcpy(dup, str, len + 1);
  785. return dup;
  786. }
  787. SordNodeType
  788. sord_node_get_type(const SordNode* node)
  789. {
  790. switch (node->node.type) {
  791. case SERD_BLANK:
  792. return SORD_BLANK;
  793. case SERD_LITERAL:
  794. return SORD_LITERAL;
  795. case SERD_URI:
  796. return SORD_URI;
  797. default:
  798. fprintf(stderr, "error: invalid node type\n");
  799. return (SordNodeType)0;
  800. }
  801. }
  802. const uint8_t*
  803. sord_node_get_string(const SordNode* node)
  804. {
  805. return node->node.buf;
  806. }
  807. const uint8_t*
  808. sord_node_get_string_counted(const SordNode* node, size_t* len)
  809. {
  810. *len = node->node.n_chars;
  811. return node->node.buf;
  812. }
  813. const char*
  814. sord_node_get_language(const SordNode* node)
  815. {
  816. if (node->node.type != SERD_LITERAL || !node->meta.lit.lang[0]) {
  817. return NULL;
  818. }
  819. return node->meta.lit.lang;
  820. }
  821. SordNode*
  822. sord_node_get_datatype(const SordNode* node)
  823. {
  824. return (node->node.type == SERD_LITERAL) ? node->meta.lit.datatype : NULL;
  825. }
  826. SerdNodeFlags
  827. sord_node_get_flags(const SordNode* node)
  828. {
  829. return node->node.flags;
  830. }
  831. bool
  832. sord_node_is_inline_object(const SordNode* node)
  833. {
  834. return (node->node.type == SERD_BLANK) && (node->meta.res.refs_as_obj == 1);
  835. }
  836. static SordNode*
  837. sord_insert_node(SordWorld* world, const SordNode* key, bool copy)
  838. {
  839. SordNode* node = NULL;
  840. ZixStatus st = zix_hash_insert(world->nodes, key, (const void**)&node);
  841. switch (st) {
  842. case ZIX_STATUS_EXISTS:
  843. ++node->refs;
  844. break;
  845. case ZIX_STATUS_SUCCESS:
  846. assert(node->refs == 1);
  847. if (copy) {
  848. node->node.buf = sord_strndup(node->node.buf, node->node.n_bytes);
  849. }
  850. if (node->node.type == SERD_LITERAL) {
  851. node->meta.lit.datatype = sord_node_copy(node->meta.lit.datatype);
  852. }
  853. return node;
  854. default:
  855. assert(!node);
  856. error(world, SERD_ERR_INTERNAL,
  857. "error inserting node `%s'\n", key->node.buf);
  858. }
  859. if (!copy) {
  860. // Free the buffer we would have copied if a new node was created
  861. free((uint8_t*)key->node.buf);
  862. }
  863. return node;
  864. }
  865. static SordNode*
  866. sord_new_uri_counted(SordWorld* world, const uint8_t* str,
  867. size_t n_bytes, size_t n_chars, bool copy)
  868. {
  869. if (!serd_uri_string_has_scheme(str)) {
  870. error(world, SERD_ERR_BAD_ARG,
  871. "attempt to map invalid URI `%s'\n", str);
  872. return NULL; // Can't intern relative URIs
  873. }
  874. const SordNode key = {
  875. { str, n_bytes, n_chars, 0, SERD_URI }, 1, { { 0 } }
  876. };
  877. return sord_insert_node(world, &key, copy);
  878. }
  879. SordNode*
  880. sord_new_uri(SordWorld* world, const uint8_t* str)
  881. {
  882. const SerdNode node = serd_node_from_string(SERD_URI, str);
  883. return sord_new_uri_counted(world, str, node.n_bytes, node.n_chars, true);
  884. }
  885. SordNode*
  886. sord_new_relative_uri(SordWorld* world,
  887. const uint8_t* str,
  888. const uint8_t* base_str)
  889. {
  890. if (serd_uri_string_has_scheme(str)) {
  891. return sord_new_uri(world, str);
  892. }
  893. SerdURI buri = SERD_URI_NULL;
  894. SerdNode base = serd_node_new_uri_from_string(base_str, NULL, &buri);
  895. SerdNode node = serd_node_new_uri_from_string(str, &buri, NULL);
  896. SordNode* ret = sord_new_uri_counted(
  897. world, node.buf, node.n_bytes, node.n_chars, false);
  898. serd_node_free(&base);
  899. return ret;
  900. }
  901. static SordNode*
  902. sord_new_blank_counted(SordWorld* world, const uint8_t* str,
  903. size_t n_bytes, size_t n_chars)
  904. {
  905. const SordNode key = {
  906. { str, n_bytes, n_chars, 0, SERD_BLANK }, 1, { { 0 } }
  907. };
  908. return sord_insert_node(world, &key, true);
  909. }
  910. SordNode*
  911. sord_new_blank(SordWorld* world, const uint8_t* str)
  912. {
  913. const SerdNode node = serd_node_from_string(SERD_URI, str);
  914. return sord_new_blank_counted(world, str, node.n_bytes, node.n_chars);
  915. }
  916. static SordNode*
  917. sord_new_literal_counted(SordWorld* world,
  918. SordNode* datatype,
  919. const uint8_t* str,
  920. size_t n_bytes,
  921. size_t n_chars,
  922. SerdNodeFlags flags,
  923. const char* lang)
  924. {
  925. SordNode key = {
  926. { str, n_bytes, n_chars, flags, SERD_LITERAL }, 1, { { 0 } }
  927. };
  928. key.meta.lit.datatype = datatype;
  929. memset(key.meta.lit.lang, 0, sizeof(key.meta.lit.lang));
  930. if (lang) {
  931. strncpy(key.meta.lit.lang, lang, sizeof(key.meta.lit.lang));
  932. }
  933. return sord_insert_node(world, &key, true);
  934. }
  935. SordNode*
  936. sord_new_literal(SordWorld* world, SordNode* datatype,
  937. const uint8_t* str, const char* lang)
  938. {
  939. SerdNodeFlags flags = 0;
  940. size_t n_bytes = 0;
  941. size_t n_chars = serd_strlen(str, &n_bytes, &flags);
  942. return sord_new_literal_counted(world, datatype,
  943. str, n_bytes, n_chars, flags,
  944. lang);
  945. }
  946. SordNode*
  947. sord_node_from_serd_node(SordWorld* world,
  948. SerdEnv* env,
  949. const SerdNode* sn,
  950. const SerdNode* datatype,
  951. const SerdNode* lang)
  952. {
  953. if (!sn) {
  954. return NULL;
  955. }
  956. SordNode* datatype_node = NULL;
  957. SordNode* ret = NULL;
  958. switch (sn->type) {
  959. case SERD_NOTHING:
  960. return NULL;
  961. case SERD_LITERAL:
  962. datatype_node = sord_node_from_serd_node(
  963. world, env, datatype, NULL, NULL),
  964. ret = sord_new_literal_counted(
  965. world,
  966. datatype_node,
  967. sn->buf,
  968. sn->n_bytes,
  969. sn->n_chars,
  970. sn->flags,
  971. lang ? (const char*)lang->buf : NULL);
  972. sord_node_free(world, datatype_node);
  973. return ret;
  974. case SERD_URI:
  975. if (serd_uri_string_has_scheme(sn->buf)) {
  976. return sord_new_uri_counted(
  977. world, sn->buf, sn->n_bytes, sn->n_chars, true);
  978. } else {
  979. SerdURI base_uri;
  980. serd_env_get_base_uri(env, &base_uri);
  981. SerdURI abs_uri;
  982. SerdNode abs_uri_node = serd_node_new_uri_from_node(
  983. sn, &base_uri, &abs_uri);
  984. ret = sord_new_uri_counted(world,
  985. abs_uri_node.buf,
  986. abs_uri_node.n_bytes,
  987. abs_uri_node.n_chars,
  988. true);
  989. serd_node_free(&abs_uri_node);
  990. return ret;
  991. }
  992. case SERD_CURIE: {
  993. SerdChunk uri_prefix;
  994. SerdChunk uri_suffix;
  995. if (serd_env_expand(env, sn, &uri_prefix, &uri_suffix)) {
  996. error(world, SERD_ERR_BAD_CURIE,
  997. "failed to expand CURIE `%s'\n", sn->buf);
  998. return NULL;
  999. }
  1000. const size_t uri_len = uri_prefix.len + uri_suffix.len;
  1001. uint8_t* buf = (uint8_t*)malloc(uri_len + 1);
  1002. memcpy(buf, uri_prefix.buf, uri_prefix.len);
  1003. memcpy(buf + uri_prefix.len, uri_suffix.buf, uri_suffix.len);
  1004. buf[uri_len] = '\0';
  1005. ret = sord_new_uri_counted(
  1006. world, buf, uri_prefix.len + uri_suffix.len,
  1007. uri_prefix.len + uri_suffix.len, false); // FIXME: UTF-8
  1008. return ret;
  1009. }
  1010. case SERD_BLANK:
  1011. return sord_new_blank_counted(world, sn->buf, sn->n_bytes, sn->n_chars);
  1012. }
  1013. return NULL;
  1014. }
  1015. const SerdNode*
  1016. sord_node_to_serd_node(const SordNode* node)
  1017. {
  1018. return node ? &node->node : &SERD_NODE_NULL;
  1019. }
  1020. void
  1021. sord_node_free(SordWorld* world, SordNode* node)
  1022. {
  1023. if (!node) {
  1024. return;
  1025. }
  1026. assert(node->refs > 0);
  1027. if (--node->refs == 0) {
  1028. sord_node_free_internal(world, node);
  1029. }
  1030. }
  1031. SordNode*
  1032. sord_node_copy(const SordNode* node)
  1033. {
  1034. SordNode* copy = (SordNode*)node;
  1035. if (copy) {
  1036. ++copy->refs;
  1037. }
  1038. return copy;
  1039. }
  1040. static inline bool
  1041. sord_add_to_index(SordModel* sord, const SordNode** tup, SordOrder order)
  1042. {
  1043. return !zix_btree_insert(sord->indices[order], tup);
  1044. }
  1045. bool
  1046. sord_add(SordModel* sord, const SordQuad tup)
  1047. {
  1048. SORD_WRITE_LOG("Add " TUP_FMT "\n", TUP_FMT_ARGS(tup));
  1049. if (!tup[0] || !tup[1] || !tup[2]) {
  1050. error(sord->world, SERD_ERR_BAD_ARG,
  1051. "attempt to add quad with NULL field\n");
  1052. return false;
  1053. } else if (sord->n_iters > 0) {
  1054. error(sord->world, SERD_ERR_BAD_ARG, "added tuple during iteration\n");
  1055. }
  1056. const SordNode** quad = (const SordNode**)malloc(sizeof(SordQuad));
  1057. memcpy(quad, tup, sizeof(SordQuad));
  1058. for (unsigned i = 0; i < NUM_ORDERS; ++i) {
  1059. if (sord->indices[i]) {
  1060. if (!sord_add_to_index(sord, quad, (SordOrder)i)) {
  1061. assert(i == 0); // Assuming index coherency
  1062. free(quad);
  1063. return false; // Quad already stored, do nothing
  1064. }
  1065. }
  1066. }
  1067. for (int i = 0; i < TUP_LEN; ++i)
  1068. sord_add_quad_ref(sord, tup[i], (SordQuadIndex)i);
  1069. ++sord->n_quads;
  1070. return true;
  1071. }
  1072. void
  1073. sord_remove(SordModel* sord, const SordQuad tup)
  1074. {
  1075. SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup));
  1076. if (sord->n_iters > 0) {
  1077. error(sord->world, SERD_ERR_BAD_ARG, "remove with iterator\n");
  1078. }
  1079. SordNode* quad = NULL;
  1080. for (unsigned i = 0; i < NUM_ORDERS; ++i) {
  1081. if (sord->indices[i]) {
  1082. if (zix_btree_remove(sord->indices[i], tup, (void**)&quad, NULL)) {
  1083. assert(i == 0); // Assuming index coherency
  1084. return; // Quad not found, do nothing
  1085. }
  1086. }
  1087. }
  1088. free(quad);
  1089. for (int i = 0; i < TUP_LEN; ++i)
  1090. sord_drop_quad_ref(sord, tup[i], (SordQuadIndex)i);
  1091. --sord->n_quads;
  1092. }
  1093. SerdStatus
  1094. sord_erase(SordModel* sord, SordIter* iter)
  1095. {
  1096. if (sord->n_iters > 1) {
  1097. error(sord->world, SERD_ERR_BAD_ARG, "erased with many iterators\n");
  1098. }
  1099. SordQuad tup;
  1100. sord_iter_get(iter, tup);
  1101. SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup));
  1102. SordNode* quad = NULL;
  1103. for (unsigned i = 0; i < NUM_ORDERS; ++i) {
  1104. if (sord->indices[i]) {
  1105. if (zix_btree_remove(sord->indices[i], tup, (void**)&quad,
  1106. i == iter->order ? &iter->cur : NULL)) {
  1107. return (i == 0) ? SERD_ERR_NOT_FOUND : SERD_ERR_INTERNAL;
  1108. }
  1109. }
  1110. }
  1111. iter->end = zix_btree_iter_is_end(iter->cur);
  1112. free(quad);
  1113. for (int i = 0; i < TUP_LEN; ++i)
  1114. sord_drop_quad_ref(sord, tup[i], (SordQuadIndex)i);
  1115. --sord->n_quads;
  1116. return SERD_SUCCESS;
  1117. }