Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

955 lines
26KB

  1. /*
  2. Copyright 2011-2016 David Robillard <http://drobilla.net>
  3. Permission to use, copy, modify, and/or distribute this software for any
  4. purpose with or without fee is hereby granted, provided that the above
  5. copyright notice and this permission notice appear in all copies.
  6. THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  7. WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  8. MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  9. ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  10. WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  11. ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  12. OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  13. */
  14. #include "serd_internal.h"
  15. #include <assert.h>
  16. #include <stdio.h>
  17. #include <stdlib.h>
  18. #include <string.h>
  19. #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  20. #define NS_XSD "http://www.w3.org/2001/XMLSchema#"
  21. typedef struct {
  22. SerdNode graph;
  23. SerdNode subject;
  24. SerdNode predicate;
  25. } WriteContext;
  26. static const WriteContext WRITE_CONTEXT_NULL = {
  27. { 0, 0, 0, 0, SERD_NOTHING },
  28. { 0, 0, 0, 0, SERD_NOTHING },
  29. { 0, 0, 0, 0, SERD_NOTHING }
  30. };
  31. typedef enum {
  32. SEP_NONE,
  33. SEP_END_S, ///< End of a subject ('.')
  34. SEP_END_P, ///< End of a predicate (';')
  35. SEP_END_O, ///< End of an object (',')
  36. SEP_S_P, ///< Between a subject and predicate (whitespace)
  37. SEP_P_O, ///< Between a predicate and object (whitespace)
  38. SEP_ANON_BEGIN, ///< Start of anonymous node ('[')
  39. SEP_ANON_END, ///< End of anonymous node (']')
  40. SEP_LIST_BEGIN, ///< Start of list ('(')
  41. SEP_LIST_SEP, ///< List separator (whitespace)
  42. SEP_LIST_END ///< End of list (')')
  43. } Sep;
  44. typedef struct {
  45. const char* str; ///< Sep string
  46. uint8_t len; ///< Length of sep string
  47. uint8_t space_before; ///< Newline before sep
  48. uint8_t space_after_node; ///< Newline after sep if after node
  49. uint8_t space_after_sep; ///< Newline after sep if after sep
  50. } SepRule;
  51. static const SepRule rules[] = {
  52. { NULL, 0, 0, 0, 0 },
  53. { " .\n\n", 4, 0, 0, 0 },
  54. { " ;", 2, 0, 1, 1 },
  55. { " ,", 2, 0, 1, 0 },
  56. { NULL, 0, 0, 1, 0 },
  57. { " ", 1, 0, 0, 0 },
  58. { "[", 1, 0, 1, 1 },
  59. { "]", 1, 1, 0, 0 },
  60. { "(", 1, 0, 0, 0 },
  61. { NULL, 1, 0, 1, 0 },
  62. { ")", 1, 1, 0, 0 },
  63. { "\n", 1, 0, 1, 0 }
  64. };
  65. struct SerdWriterImpl {
  66. SerdSyntax syntax;
  67. SerdStyle style;
  68. SerdEnv* env;
  69. SerdNode root_node;
  70. SerdURI root_uri;
  71. SerdURI base_uri;
  72. SerdStack anon_stack;
  73. SerdBulkSink bulk_sink;
  74. SerdSink sink;
  75. void* stream;
  76. SerdErrorSink error_sink;
  77. void* error_handle;
  78. WriteContext context;
  79. SerdNode list_subj;
  80. unsigned list_depth;
  81. uint8_t* bprefix;
  82. size_t bprefix_len;
  83. unsigned indent;
  84. Sep last_sep;
  85. bool empty;
  86. };
  87. typedef enum {
  88. WRITE_STRING,
  89. WRITE_LONG_STRING
  90. } TextContext;
  91. static void
  92. w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...)
  93. {
  94. /* TODO: This results in errors with no file information, which is not
  95. helpful when re-serializing a file (particularly for "undefined
  96. namespace prefix" errors. The statement sink API needs to be changed to
  97. add a Cursor parameter so the source can notify the writer of the
  98. statement origin for better error reporting. */
  99. va_list args;
  100. va_start(args, fmt);
  101. const SerdError e = { st, NULL, 0, 0, fmt, &args };
  102. serd_error(writer->error_sink, writer->error_handle, &e);
  103. va_end(args);
  104. }
  105. static inline WriteContext*
  106. anon_stack_top(SerdWriter* writer)
  107. {
  108. assert(!serd_stack_is_empty(&writer->anon_stack));
  109. return (WriteContext*)(writer->anon_stack.buf
  110. + writer->anon_stack.size - sizeof(WriteContext));
  111. }
  112. static void
  113. copy_node(SerdNode* dst, const SerdNode* src)
  114. {
  115. if (src) {
  116. dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1);
  117. dst->n_bytes = src->n_bytes;
  118. dst->n_chars = src->n_chars;
  119. dst->flags = src->flags;
  120. dst->type = src->type;
  121. memcpy((char*)dst->buf, src->buf, src->n_bytes + 1);
  122. } else {
  123. dst->type = SERD_NOTHING;
  124. }
  125. }
  126. static inline size_t
  127. sink(const void* buf, size_t len, SerdWriter* writer)
  128. {
  129. if (len == 0) {
  130. return 0;
  131. } else if (writer->style & SERD_STYLE_BULK) {
  132. return serd_bulk_sink_write(buf, len, &writer->bulk_sink);
  133. } else {
  134. return writer->sink(buf, len, writer->stream);
  135. }
  136. }
  137. // Parse a UTF-8 character, set *size to the length, and return the code point
  138. static inline uint32_t
  139. parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size)
  140. {
  141. uint32_t c = 0;
  142. if ((utf8[0] & 0x80) == 0) { // Starts with `0'
  143. *size = 1;
  144. c = utf8[0];
  145. } else if ((utf8[0] & 0xE0) == 0xC0) { // Starts with `110'
  146. *size = 2;
  147. c = utf8[0] & 0x1F;
  148. } else if ((utf8[0] & 0xF0) == 0xE0) { // Starts with `1110'
  149. *size = 3;
  150. c = utf8[0] & 0x0F;
  151. } else if ((utf8[0] & 0xF8) == 0xF0) { // Starts with `11110'
  152. *size = 4;
  153. c = utf8[0] & 0x07;
  154. } else {
  155. w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]);
  156. *size = 0;
  157. return 0;
  158. }
  159. size_t i = 0;
  160. uint8_t in = utf8[i++];
  161. #define READ_BYTE() \
  162. in = utf8[i++] & 0x3F; \
  163. c = (c << 6) | in;
  164. switch (*size) {
  165. case 4: READ_BYTE();
  166. case 3: READ_BYTE();
  167. case 2: READ_BYTE();
  168. }
  169. return c;
  170. }
  171. // Write a single character, as an escape for single byte characters
  172. // (Caller prints any single byte characters that don't need escaping)
  173. static size_t
  174. write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size)
  175. {
  176. const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD };
  177. char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
  178. const uint8_t in = utf8[0];
  179. uint32_t c = parse_utf8_char(writer, utf8, size);
  180. switch (*size) {
  181. case 0:
  182. w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", in);
  183. return sink(replacement_char, sizeof(replacement_char), writer);
  184. case 1:
  185. snprintf(escape, sizeof(escape), "\\u%04X", in);
  186. return sink(escape, 6, writer);
  187. default:
  188. break;
  189. }
  190. if (!(writer->style & SERD_STYLE_ASCII)) {
  191. // Write UTF-8 character directly to UTF-8 output
  192. return sink(utf8, *size, writer);
  193. }
  194. if (c <= 0xFFFF) {
  195. snprintf(escape, sizeof(escape), "\\u%04X", c);
  196. return sink(escape, 6, writer);
  197. } else {
  198. snprintf(escape, sizeof(escape), "\\U%08X", c);
  199. return sink(escape, 10, writer);
  200. }
  201. }
  202. static inline bool
  203. uri_must_escape(const uint8_t c)
  204. {
  205. switch (c) {
  206. case ' ': case '"': case '<': case '>': case '\\':
  207. case '^': case '`': case '{': case '|': case '}':
  208. return true;
  209. default:
  210. return !in_range(c, 0x20, 0x7E);
  211. }
  212. }
  213. static size_t
  214. write_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
  215. {
  216. size_t len = 0;
  217. for (size_t i = 0; i < n_bytes;) {
  218. size_t j = i; // Index of next character that must be escaped
  219. for (; j < n_bytes; ++j) {
  220. if (uri_must_escape(utf8[j])) {
  221. break;
  222. }
  223. }
  224. // Bulk write all characters up to this special one
  225. len += sink(&utf8[i], j - i, writer);
  226. if ((i = j) == n_bytes) {
  227. break; // Reached end
  228. }
  229. // Write UTF-8 character
  230. size_t size = 0;
  231. len += write_character(writer, utf8 + i, &size);
  232. i += size;
  233. }
  234. return len;
  235. }
  236. static bool
  237. lname_must_escape(const uint8_t c)
  238. {
  239. /* This arbitrary list of characters, most of which have nothing to do with
  240. Turtle, must be handled as special cases here because the RDF and SPARQL
  241. WGs are apparently intent on making the once elegant Turtle a baroque
  242. and inconsistent mess, throwing elegance and extensibility completely
  243. out the window for no good reason.
  244. Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped
  245. in local names, so they are not escaped here. */
  246. switch (c) {
  247. case '\'': case '!': case '#': case '$': case '%': case '&':
  248. case '(': case ')': case '*': case '+': case ',': case '/':
  249. case ';': case '=': case '?': case '@': case '~':
  250. return true;
  251. }
  252. return false;
  253. }
  254. static size_t
  255. write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
  256. {
  257. size_t len = 0;
  258. for (size_t i = 0; i < n_bytes; ++i) {
  259. size_t j = i; // Index of next character that must be escaped
  260. for (; j < n_bytes; ++j) {
  261. if (lname_must_escape(utf8[j])) {
  262. break;
  263. }
  264. }
  265. // Bulk write all characters up to this special one
  266. len += sink(&utf8[i], j - i, writer);
  267. if ((i = j) == n_bytes) {
  268. break; // Reached end
  269. }
  270. // Write escape
  271. len += sink("\\", 1, writer);
  272. len += sink(&utf8[i], 1, writer);
  273. }
  274. return len;
  275. }
  276. static size_t
  277. write_text(SerdWriter* writer, TextContext ctx,
  278. const uint8_t* utf8, size_t n_bytes)
  279. {
  280. size_t len = 0;
  281. for (size_t i = 0; i < n_bytes;) {
  282. // Fast bulk write for long strings of printable ASCII
  283. size_t j = i;
  284. for (; j < n_bytes; ++j) {
  285. if (utf8[j] == '\\' || utf8[j] == '"'
  286. || (!in_range(utf8[j], 0x20, 0x7E))) {
  287. break;
  288. }
  289. }
  290. len += sink(&utf8[i], j - i, writer);
  291. if ((i = j) == n_bytes) {
  292. break; // Reached end
  293. }
  294. uint8_t in = utf8[i++];
  295. if (ctx == WRITE_LONG_STRING) {
  296. switch (in) {
  297. case '\\': len += sink("\\\\", 2, writer); continue;
  298. case '\b': len += sink("\\b", 2, writer); continue;
  299. case '\n': case '\r': case '\t': case '\f':
  300. len += sink(&in, 1, writer); // Write character as-is
  301. continue;
  302. case '\"':
  303. if (i == n_bytes) { // '"' at string end
  304. len += sink("\\\"", 2, writer);
  305. } else {
  306. len += sink(&in, 1, writer);
  307. }
  308. continue;
  309. default: break;
  310. }
  311. } else if (ctx == WRITE_STRING) {
  312. switch (in) {
  313. case '\\': len += sink("\\\\", 2, writer); continue;
  314. case '\n': len += sink("\\n", 2, writer); continue;
  315. case '\r': len += sink("\\r", 2, writer); continue;
  316. case '\t': len += sink("\\t", 2, writer); continue;
  317. case '"': len += sink("\\\"", 2, writer); continue;
  318. default: break;
  319. }
  320. if (writer->syntax != SERD_NTRIPLES) {
  321. switch (in) {
  322. case '\b': len += sink("\\b", 2, writer); continue;
  323. case '\f': len += sink("\\f", 2, writer); continue;
  324. }
  325. }
  326. }
  327. size_t size = 0;
  328. len += write_character(writer, utf8 + i - 1, &size);
  329. if (size == 0) {
  330. return len;
  331. }
  332. i += size - 1;
  333. }
  334. return len;
  335. }
  336. static size_t
  337. uri_sink(const void* buf, size_t len, void* stream)
  338. {
  339. return write_uri((SerdWriter*)stream, (const uint8_t*)buf, len);
  340. }
  341. static void
  342. write_newline(SerdWriter* writer)
  343. {
  344. sink("\n", 1, writer);
  345. for (unsigned i = 0; i < writer->indent; ++i) {
  346. sink("\t", 1, writer);
  347. }
  348. }
  349. static void
  350. write_sep(SerdWriter* writer, const Sep sep)
  351. {
  352. const SepRule* rule = &rules[sep];
  353. if (rule->space_before) {
  354. write_newline(writer);
  355. }
  356. if (rule->str) {
  357. sink(rule->str, rule->len, writer);
  358. }
  359. if ( (writer->last_sep && rule->space_after_sep)
  360. || (!writer->last_sep && rule->space_after_node)) {
  361. write_newline(writer);
  362. } else if (writer->last_sep && rule->space_after_node) {
  363. sink(" ", 1, writer);
  364. }
  365. writer->last_sep = sep;
  366. }
  367. static SerdStatus
  368. reset_context(SerdWriter* writer, bool del)
  369. {
  370. if (del) {
  371. serd_node_free(&writer->context.graph);
  372. serd_node_free(&writer->context.subject);
  373. serd_node_free(&writer->context.predicate);
  374. writer->context = WRITE_CONTEXT_NULL;
  375. } else {
  376. writer->context.graph.type = SERD_NOTHING;
  377. writer->context.subject.type = SERD_NOTHING;
  378. writer->context.predicate.type = SERD_NOTHING;
  379. }
  380. writer->empty = false;
  381. return SERD_SUCCESS;
  382. }
  383. typedef enum {
  384. FIELD_NONE,
  385. FIELD_SUBJECT,
  386. FIELD_PREDICATE,
  387. FIELD_OBJECT
  388. } Field;
  389. static bool
  390. is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags)
  391. {
  392. return (writer->syntax != SERD_NTRIPLES &&
  393. ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) ||
  394. (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN))));
  395. }
  396. static bool
  397. write_node(SerdWriter* writer,
  398. const SerdNode* node,
  399. const SerdNode* datatype,
  400. const SerdNode* lang,
  401. Field field,
  402. SerdStatementFlags flags)
  403. {
  404. SerdChunk uri_prefix;
  405. SerdNode prefix;
  406. SerdChunk suffix;
  407. bool has_scheme;
  408. switch (node->type) {
  409. case SERD_BLANK:
  410. if (is_inline_start(writer, field, flags)) {
  411. ++writer->indent;
  412. write_sep(writer, SEP_ANON_BEGIN);
  413. } else if (writer->syntax != SERD_NTRIPLES
  414. && (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN))) {
  415. assert(writer->list_depth == 0);
  416. copy_node(&writer->list_subj, node);
  417. ++writer->list_depth;
  418. ++writer->indent;
  419. write_sep(writer, SEP_LIST_BEGIN);
  420. } else if (writer->syntax != SERD_NTRIPLES
  421. && (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN))) {
  422. ++writer->indent;
  423. ++writer->list_depth;
  424. write_sep(writer, SEP_LIST_BEGIN);
  425. } else if (writer->syntax != SERD_NTRIPLES
  426. && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S))
  427. || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) {
  428. sink("[]", 2, writer);
  429. } else {
  430. sink("_:", 2, writer);
  431. if (writer->bprefix && !strncmp((const char*)node->buf,
  432. (const char*)writer->bprefix,
  433. writer->bprefix_len)) {
  434. sink(node->buf + writer->bprefix_len,
  435. node->n_bytes - writer->bprefix_len,
  436. writer);
  437. } else {
  438. sink(node->buf, node->n_bytes, writer);
  439. }
  440. }
  441. break;
  442. case SERD_CURIE:
  443. switch (writer->syntax) {
  444. case SERD_NTRIPLES:
  445. if (serd_env_expand(writer->env, node, &uri_prefix, &suffix)) {
  446. w_err(writer, SERD_ERR_BAD_CURIE,
  447. "undefined namespace prefix `%s'\n", node->buf);
  448. return false;
  449. }
  450. sink("<", 1, writer);
  451. write_uri(writer, uri_prefix.buf, uri_prefix.len);
  452. write_uri(writer, suffix.buf, suffix.len);
  453. sink(">", 1, writer);
  454. break;
  455. case SERD_TURTLE:
  456. if (is_inline_start(writer, field, flags)) {
  457. ++writer->indent;
  458. write_sep(writer, SEP_ANON_BEGIN);
  459. sink("== ", 3, writer);
  460. }
  461. write_lname(writer, node->buf, node->n_bytes);
  462. if (is_inline_start(writer, field, flags)) {
  463. sink(" ;", 2, writer);
  464. write_newline(writer);
  465. }
  466. }
  467. break;
  468. case SERD_LITERAL:
  469. if (writer->syntax == SERD_TURTLE && datatype && datatype->buf) {
  470. const char* type_uri = (const char*)datatype->buf;
  471. if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && (
  472. !strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") ||
  473. !strcmp(type_uri + sizeof(NS_XSD) - 1, "integer"))) {
  474. sink(node->buf, node->n_bytes, writer);
  475. break;
  476. } else if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) &&
  477. !strcmp(type_uri + sizeof(NS_XSD) - 1, "decimal") &&
  478. strchr((const char*)node->buf, '.') &&
  479. node->buf[node->n_bytes - 1] != '.') {
  480. /* xsd:decimal literals without trailing digits, e.g. "5.", can
  481. not be written bare in Turtle. We could add a 0 which is
  482. prettier, but changes the text and breaks round tripping.
  483. */
  484. sink(node->buf, node->n_bytes, writer);
  485. break;
  486. }
  487. }
  488. if (writer->syntax != SERD_NTRIPLES
  489. && (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) {
  490. sink("\"\"\"", 3, writer);
  491. write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes);
  492. sink("\"\"\"", 3, writer);
  493. } else {
  494. sink("\"", 1, writer);
  495. write_text(writer, WRITE_STRING, node->buf, node->n_bytes);
  496. sink("\"", 1, writer);
  497. }
  498. if (lang && lang->buf) {
  499. sink("@", 1, writer);
  500. sink(lang->buf, lang->n_bytes, writer);
  501. } else if (datatype && datatype->buf) {
  502. sink("^^", 2, writer);
  503. write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags);
  504. }
  505. break;
  506. case SERD_URI:
  507. if (is_inline_start(writer, field, flags)) {
  508. ++writer->indent;
  509. write_sep(writer, SEP_ANON_BEGIN);
  510. sink("== ", 3, writer);
  511. }
  512. has_scheme = serd_uri_string_has_scheme(node->buf);
  513. if (field == FIELD_PREDICATE && (writer->syntax == SERD_TURTLE)
  514. && !strcmp((const char*)node->buf, NS_RDF "type")) {
  515. sink("a", 1, writer);
  516. break;
  517. } else if ((writer->syntax == SERD_TURTLE)
  518. && !strcmp((const char*)node->buf, NS_RDF "nil")) {
  519. sink("()", 2, writer);
  520. break;
  521. } else if (has_scheme && (writer->style & SERD_STYLE_CURIED) &&
  522. serd_env_qualify(writer->env, node, &prefix, &suffix)) {
  523. write_uri(writer, prefix.buf, prefix.n_bytes);
  524. sink(":", 1, writer);
  525. write_uri(writer, suffix.buf, suffix.len);
  526. break;
  527. }
  528. sink("<", 1, writer);
  529. if (writer->style & SERD_STYLE_RESOLVED) {
  530. SerdURI in_base_uri, uri, abs_uri;
  531. serd_env_get_base_uri(writer->env, &in_base_uri);
  532. serd_uri_parse(node->buf, &uri);
  533. serd_uri_resolve(&uri, &in_base_uri, &abs_uri);
  534. bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri);
  535. SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri;
  536. if (!uri_is_under(&abs_uri, root) ||
  537. writer->syntax == SERD_NTRIPLES) {
  538. serd_uri_serialise(&abs_uri, uri_sink, writer);
  539. } else {
  540. serd_uri_serialise_relative(
  541. &uri, &writer->base_uri, root, uri_sink, writer);
  542. }
  543. } else {
  544. write_uri(writer, node->buf, node->n_bytes);
  545. }
  546. sink(">", 1, writer);
  547. if (is_inline_start(writer, field, flags)) {
  548. sink(" ;", 2, writer);
  549. write_newline(writer);
  550. }
  551. default:
  552. break;
  553. }
  554. writer->last_sep = SEP_NONE;
  555. return true;
  556. }
  557. static inline bool
  558. is_resource(const SerdNode* node)
  559. {
  560. return node->type > SERD_LITERAL;
  561. }
  562. static void
  563. write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred)
  564. {
  565. write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags);
  566. write_sep(writer, SEP_P_O);
  567. copy_node(&writer->context.predicate, pred);
  568. }
  569. static bool
  570. write_list_obj(SerdWriter* writer,
  571. SerdStatementFlags flags,
  572. const SerdNode* predicate,
  573. const SerdNode* object,
  574. const SerdNode* datatype,
  575. const SerdNode* lang)
  576. {
  577. if (!strcmp((const char*)object->buf, NS_RDF "nil")) {
  578. --writer->indent;
  579. write_sep(writer, SEP_LIST_END);
  580. return true;
  581. } else if (!strcmp((const char*)predicate->buf, NS_RDF "first")) {
  582. write_sep(writer, SEP_LIST_SEP);
  583. write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
  584. }
  585. return false;
  586. }
  587. SERD_API
  588. SerdStatus
  589. serd_writer_write_statement(SerdWriter* writer,
  590. SerdStatementFlags flags,
  591. const SerdNode* graph,
  592. const SerdNode* subject,
  593. const SerdNode* predicate,
  594. const SerdNode* object,
  595. const SerdNode* datatype,
  596. const SerdNode* lang)
  597. {
  598. if (!subject || !predicate || !object
  599. || !subject->buf || !predicate->buf || !object->buf
  600. || !is_resource(subject) || !is_resource(predicate)) {
  601. return SERD_ERR_BAD_ARG;
  602. }
  603. #define TRY(write_result) \
  604. if (!write_result) { \
  605. return SERD_ERR_UNKNOWN; \
  606. }
  607. switch (writer->syntax) {
  608. case SERD_NTRIPLES:
  609. TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags));
  610. sink(" ", 1, writer);
  611. TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags));
  612. sink(" ", 1, writer);
  613. TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags));
  614. sink(" .\n", 3, writer);
  615. return SERD_SUCCESS;
  616. default:
  617. break;
  618. }
  619. if ((flags & SERD_LIST_CONT)) {
  620. if (write_list_obj(writer, flags, predicate, object, datatype, lang)) {
  621. // Reached end of list
  622. if (--writer->list_depth == 0 && writer->list_subj.type) {
  623. reset_context(writer, true);
  624. writer->context.subject = writer->list_subj;
  625. writer->list_subj = SERD_NODE_NULL;
  626. }
  627. return SERD_SUCCESS;
  628. }
  629. } else if (serd_node_equals(subject, &writer->context.subject)) {
  630. if (serd_node_equals(predicate, &writer->context.predicate)) {
  631. // Abbreviate S P
  632. if (!(flags & SERD_ANON_O_BEGIN)) {
  633. ++writer->indent;
  634. }
  635. write_sep(writer, SEP_END_O);
  636. write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
  637. if (!(flags & SERD_ANON_O_BEGIN)) {
  638. --writer->indent;
  639. }
  640. } else {
  641. // Abbreviate S
  642. Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P;
  643. write_sep(writer, sep);
  644. write_pred(writer, flags, predicate);
  645. write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
  646. }
  647. } else {
  648. // No abbreviation
  649. if (writer->context.subject.type) {
  650. assert(writer->indent > 0);
  651. --writer->indent;
  652. if (serd_stack_is_empty(&writer->anon_stack)) {
  653. write_sep(writer, SEP_END_S);
  654. }
  655. } else if (!writer->empty) {
  656. write_sep(writer, SEP_S_P);
  657. }
  658. if (!(flags & SERD_ANON_CONT)) {
  659. write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags);
  660. ++writer->indent;
  661. write_sep(writer, SEP_S_P);
  662. } else {
  663. ++writer->indent;
  664. }
  665. reset_context(writer, true);
  666. copy_node(&writer->context.subject, subject);
  667. if (!(flags & SERD_LIST_S_BEGIN)) {
  668. write_pred(writer, flags, predicate);
  669. }
  670. write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
  671. }
  672. if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) {
  673. WriteContext* ctx = (WriteContext*)serd_stack_push(
  674. &writer->anon_stack, sizeof(WriteContext));
  675. *ctx = writer->context;
  676. WriteContext new_context = {
  677. serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL };
  678. if ((flags & SERD_ANON_S_BEGIN)) {
  679. new_context.predicate = serd_node_copy(predicate);
  680. }
  681. writer->context = new_context;
  682. } else {
  683. copy_node(&writer->context.graph, graph);
  684. copy_node(&writer->context.subject, subject);
  685. copy_node(&writer->context.predicate, predicate);
  686. }
  687. return SERD_SUCCESS;
  688. }
  689. SERD_API
  690. SerdStatus
  691. serd_writer_end_anon(SerdWriter* writer,
  692. const SerdNode* node)
  693. {
  694. if (writer->syntax == SERD_NTRIPLES) {
  695. return SERD_SUCCESS;
  696. }
  697. if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) {
  698. w_err(writer, SERD_ERR_UNKNOWN,
  699. "unexpected end of anonymous node\n");
  700. return SERD_ERR_UNKNOWN;
  701. }
  702. --writer->indent;
  703. write_sep(writer, SEP_ANON_END);
  704. reset_context(writer, true);
  705. writer->context = *anon_stack_top(writer);
  706. serd_stack_pop(&writer->anon_stack, sizeof(WriteContext));
  707. const bool is_subject = serd_node_equals(node, &writer->context.subject);
  708. if (is_subject) {
  709. copy_node(&writer->context.subject, node);
  710. writer->context.predicate.type = SERD_NOTHING;
  711. }
  712. return SERD_SUCCESS;
  713. }
  714. SERD_API
  715. SerdStatus
  716. serd_writer_finish(SerdWriter* writer)
  717. {
  718. if (writer->context.subject.type) {
  719. sink(" .\n", 3, writer);
  720. }
  721. if (writer->style & SERD_STYLE_BULK) {
  722. serd_bulk_sink_flush(&writer->bulk_sink);
  723. }
  724. writer->indent = 0;
  725. return reset_context(writer, true);
  726. }
  727. SERD_API
  728. SerdWriter*
  729. serd_writer_new(SerdSyntax syntax,
  730. SerdStyle style,
  731. SerdEnv* env,
  732. const SerdURI* base_uri,
  733. SerdSink ssink,
  734. void* stream)
  735. {
  736. const WriteContext context = WRITE_CONTEXT_NULL;
  737. SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter));
  738. writer->syntax = syntax;
  739. writer->style = style;
  740. writer->env = env;
  741. writer->root_node = SERD_NODE_NULL;
  742. writer->root_uri = SERD_URI_NULL;
  743. writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL;
  744. writer->anon_stack = serd_stack_new(sizeof(WriteContext));
  745. writer->sink = ssink;
  746. writer->stream = stream;
  747. writer->context = context;
  748. writer->list_subj = SERD_NODE_NULL;
  749. writer->empty = true;
  750. if (style & SERD_STYLE_BULK) {
  751. writer->bulk_sink = serd_bulk_sink_new(ssink, stream, SERD_PAGE_SIZE);
  752. }
  753. return writer;
  754. }
  755. SERD_API
  756. void
  757. serd_writer_set_error_sink(SerdWriter* writer,
  758. SerdErrorSink error_sink,
  759. void* error_handle)
  760. {
  761. writer->error_sink = error_sink;
  762. writer->error_handle = error_handle;
  763. }
  764. SERD_API
  765. void
  766. serd_writer_chop_blank_prefix(SerdWriter* writer,
  767. const uint8_t* prefix)
  768. {
  769. free(writer->bprefix);
  770. writer->bprefix_len = 0;
  771. writer->bprefix = NULL;
  772. if (prefix) {
  773. writer->bprefix_len = strlen((const char*)prefix);
  774. writer->bprefix = (uint8_t*)malloc(writer->bprefix_len + 1);
  775. memcpy(writer->bprefix, prefix, writer->bprefix_len + 1);
  776. }
  777. }
  778. SERD_API
  779. SerdStatus
  780. serd_writer_set_base_uri(SerdWriter* writer,
  781. const SerdNode* uri)
  782. {
  783. if (!serd_env_set_base_uri(writer->env, uri)) {
  784. serd_env_get_base_uri(writer->env, &writer->base_uri);
  785. if (writer->syntax != SERD_NTRIPLES) {
  786. if (writer->context.graph.type || writer->context.subject.type) {
  787. sink(" .\n\n", 4, writer);
  788. reset_context(writer, false);
  789. }
  790. sink("@base <", 7, writer);
  791. sink(uri->buf, uri->n_bytes, writer);
  792. sink("> .\n", 4, writer);
  793. }
  794. writer->indent = 0;
  795. return reset_context(writer, false);
  796. }
  797. return SERD_ERR_UNKNOWN;
  798. }
  799. SERD_API
  800. SerdStatus
  801. serd_writer_set_root_uri(SerdWriter* writer,
  802. const SerdNode* uri)
  803. {
  804. serd_node_free(&writer->root_node);
  805. if (uri && uri->buf) {
  806. writer->root_node = serd_node_copy(uri);
  807. serd_uri_parse(uri->buf, &writer->root_uri);
  808. } else {
  809. writer->root_node = SERD_NODE_NULL;
  810. writer->root_uri = SERD_URI_NULL;
  811. }
  812. return SERD_SUCCESS;
  813. }
  814. SERD_API
  815. SerdStatus
  816. serd_writer_set_prefix(SerdWriter* writer,
  817. const SerdNode* name,
  818. const SerdNode* uri)
  819. {
  820. if (!serd_env_set_prefix(writer->env, name, uri)) {
  821. if (writer->syntax != SERD_NTRIPLES) {
  822. if (writer->context.graph.type || writer->context.subject.type) {
  823. sink(" .\n\n", 4, writer);
  824. reset_context(writer, false);
  825. }
  826. sink("@prefix ", 8, writer);
  827. sink(name->buf, name->n_bytes, writer);
  828. sink(": <", 3, writer);
  829. write_uri(writer, uri->buf, uri->n_bytes);
  830. sink("> .\n", 4, writer);
  831. }
  832. writer->indent = 0;
  833. return reset_context(writer, false);
  834. }
  835. return SERD_ERR_UNKNOWN;
  836. }
  837. SERD_API
  838. void
  839. serd_writer_free(SerdWriter* writer)
  840. {
  841. serd_writer_finish(writer);
  842. serd_stack_free(&writer->anon_stack);
  843. free(writer->bprefix);
  844. if (writer->style & SERD_STYLE_BULK) {
  845. serd_bulk_sink_free(&writer->bulk_sink);
  846. }
  847. serd_node_free(&writer->root_node);
  848. free(writer);
  849. }
  850. SERD_API
  851. SerdEnv*
  852. serd_writer_get_env(SerdWriter* writer)
  853. {
  854. return writer->env;
  855. }
  856. SERD_API
  857. size_t
  858. serd_file_sink(const void* buf, size_t len, void* stream)
  859. {
  860. return fwrite(buf, 1, len, (FILE*)stream);
  861. }
  862. SERD_API
  863. size_t
  864. serd_chunk_sink(const void* buf, size_t len, void* stream)
  865. {
  866. SerdChunk* chunk = (SerdChunk*)stream;
  867. chunk->buf = (uint8_t*)realloc((uint8_t*)chunk->buf, chunk->len + len);
  868. memcpy((uint8_t*)chunk->buf + chunk->len, buf, len);
  869. chunk->len += len;
  870. return len;
  871. }
  872. SERD_API
  873. uint8_t*
  874. serd_chunk_sink_finish(SerdChunk* stream)
  875. {
  876. serd_chunk_sink("", 1, stream);
  877. return (uint8_t*)stream->buf;
  878. }