|
- /*
- Copyright 2011-2016 David Robillard <http://drobilla.net>
-
- Permission to use, copy, modify, and/or distribute this software for any
- purpose with or without fee is hereby granted, provided that the above
- copyright notice and this permission notice appear in all copies.
-
- THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
- #include "serd_internal.h"
-
- #include <assert.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
-
- #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- #define NS_XSD "http://www.w3.org/2001/XMLSchema#"
-
- typedef struct {
- SerdNode graph;
- SerdNode subject;
- SerdNode predicate;
- } WriteContext;
-
- static const WriteContext WRITE_CONTEXT_NULL = {
- { 0, 0, 0, 0, SERD_NOTHING },
- { 0, 0, 0, 0, SERD_NOTHING },
- { 0, 0, 0, 0, SERD_NOTHING }
- };
-
- typedef enum {
- SEP_NONE,
- SEP_END_S, ///< End of a subject ('.')
- SEP_END_P, ///< End of a predicate (';')
- SEP_END_O, ///< End of an object (',')
- SEP_S_P, ///< Between a subject and predicate (whitespace)
- SEP_P_O, ///< Between a predicate and object (whitespace)
- SEP_ANON_BEGIN, ///< Start of anonymous node ('[')
- SEP_ANON_END, ///< End of anonymous node (']')
- SEP_LIST_BEGIN, ///< Start of list ('(')
- SEP_LIST_SEP, ///< List separator (whitespace)
- SEP_LIST_END ///< End of list (')')
- } Sep;
-
- typedef struct {
- const char* str; ///< Sep string
- uint8_t len; ///< Length of sep string
- uint8_t space_before; ///< Newline before sep
- uint8_t space_after_node; ///< Newline after sep if after node
- uint8_t space_after_sep; ///< Newline after sep if after sep
- } SepRule;
-
- static const SepRule rules[] = {
- { NULL, 0, 0, 0, 0 },
- { " .\n\n", 4, 0, 0, 0 },
- { " ;", 2, 0, 1, 1 },
- { " ,", 2, 0, 1, 0 },
- { NULL, 0, 0, 1, 0 },
- { " ", 1, 0, 0, 0 },
- { "[", 1, 0, 1, 1 },
- { "]", 1, 1, 0, 0 },
- { "(", 1, 0, 0, 0 },
- { NULL, 1, 0, 1, 0 },
- { ")", 1, 1, 0, 0 },
- { "\n", 1, 0, 1, 0 }
- };
-
- struct SerdWriterImpl {
- SerdSyntax syntax;
- SerdStyle style;
- SerdEnv* env;
- SerdNode root_node;
- SerdURI root_uri;
- SerdURI base_uri;
- SerdStack anon_stack;
- SerdBulkSink bulk_sink;
- SerdSink sink;
- void* stream;
- SerdErrorSink error_sink;
- void* error_handle;
- WriteContext context;
- SerdNode list_subj;
- unsigned list_depth;
- uint8_t* bprefix;
- size_t bprefix_len;
- unsigned indent;
- Sep last_sep;
- bool empty;
- };
-
- typedef enum {
- WRITE_STRING,
- WRITE_LONG_STRING
- } TextContext;
-
- static void
- w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...)
- {
- /* TODO: This results in errors with no file information, which is not
- helpful when re-serializing a file (particularly for "undefined
- namespace prefix" errors. The statement sink API needs to be changed to
- add a Cursor parameter so the source can notify the writer of the
- statement origin for better error reporting. */
-
- va_list args;
- va_start(args, fmt);
- const SerdError e = { st, NULL, 0, 0, fmt, &args };
- serd_error(writer->error_sink, writer->error_handle, &e);
- va_end(args);
- }
-
- static inline WriteContext*
- anon_stack_top(SerdWriter* writer)
- {
- assert(!serd_stack_is_empty(&writer->anon_stack));
- return (WriteContext*)(writer->anon_stack.buf
- + writer->anon_stack.size - sizeof(WriteContext));
- }
-
- static void
- copy_node(SerdNode* dst, const SerdNode* src)
- {
- if (src) {
- dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1);
- dst->n_bytes = src->n_bytes;
- dst->n_chars = src->n_chars;
- dst->flags = src->flags;
- dst->type = src->type;
- memcpy((char*)dst->buf, src->buf, src->n_bytes + 1);
- } else {
- dst->type = SERD_NOTHING;
- }
- }
-
- static inline size_t
- sink(const void* buf, size_t len, SerdWriter* writer)
- {
- if (len == 0) {
- return 0;
- } else if (writer->style & SERD_STYLE_BULK) {
- return serd_bulk_sink_write(buf, len, &writer->bulk_sink);
- } else {
- return writer->sink(buf, len, writer->stream);
- }
- }
-
- // Parse a UTF-8 character, set *size to the length, and return the code point
- static inline uint32_t
- parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size)
- {
- uint32_t c = 0;
- if ((utf8[0] & 0x80) == 0) { // Starts with `0'
- *size = 1;
- c = utf8[0];
- } else if ((utf8[0] & 0xE0) == 0xC0) { // Starts with `110'
- *size = 2;
- c = utf8[0] & 0x1F;
- } else if ((utf8[0] & 0xF0) == 0xE0) { // Starts with `1110'
- *size = 3;
- c = utf8[0] & 0x0F;
- } else if ((utf8[0] & 0xF8) == 0xF0) { // Starts with `11110'
- *size = 4;
- c = utf8[0] & 0x07;
- } else {
- w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]);
- *size = 0;
- return 0;
- }
-
- size_t i = 0;
- uint8_t in = utf8[i++];
-
- #define READ_BYTE() \
- in = utf8[i++] & 0x3F; \
- c = (c << 6) | in;
-
- switch (*size) {
- case 4: READ_BYTE();
- case 3: READ_BYTE();
- case 2: READ_BYTE();
- }
-
- return c;
- }
-
- // Write a single character, as an escape for single byte characters
- // (Caller prints any single byte characters that don't need escaping)
- static size_t
- write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size)
- {
- const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD };
- char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
- const uint8_t in = utf8[0];
-
- uint32_t c = parse_utf8_char(writer, utf8, size);
- switch (*size) {
- case 0:
- w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", in);
- return sink(replacement_char, sizeof(replacement_char), writer);
- case 1:
- snprintf(escape, sizeof(escape), "\\u%04X", in);
- return sink(escape, 6, writer);
- default:
- break;
- }
-
- if (!(writer->style & SERD_STYLE_ASCII)) {
- // Write UTF-8 character directly to UTF-8 output
- return sink(utf8, *size, writer);
- }
-
- if (c <= 0xFFFF) {
- snprintf(escape, sizeof(escape), "\\u%04X", c);
- return sink(escape, 6, writer);
- } else {
- snprintf(escape, sizeof(escape), "\\U%08X", c);
- return sink(escape, 10, writer);
- }
- }
-
- static inline bool
- uri_must_escape(const uint8_t c)
- {
- switch (c) {
- case ' ': case '"': case '<': case '>': case '\\':
- case '^': case '`': case '{': case '|': case '}':
- return true;
- default:
- return !in_range(c, 0x20, 0x7E);
- }
- }
-
- static size_t
- write_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
- {
- size_t len = 0;
- for (size_t i = 0; i < n_bytes;) {
- size_t j = i; // Index of next character that must be escaped
- for (; j < n_bytes; ++j) {
- if (uri_must_escape(utf8[j])) {
- break;
- }
- }
-
- // Bulk write all characters up to this special one
- len += sink(&utf8[i], j - i, writer);
- if ((i = j) == n_bytes) {
- break; // Reached end
- }
-
- // Write UTF-8 character
- size_t size = 0;
- len += write_character(writer, utf8 + i, &size);
- i += size;
- }
- return len;
- }
-
- static bool
- lname_must_escape(const uint8_t c)
- {
- /* This arbitrary list of characters, most of which have nothing to do with
- Turtle, must be handled as special cases here because the RDF and SPARQL
- WGs are apparently intent on making the once elegant Turtle a baroque
- and inconsistent mess, throwing elegance and extensibility completely
- out the window for no good reason.
-
- Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped
- in local names, so they are not escaped here. */
-
- switch (c) {
- case '\'': case '!': case '#': case '$': case '%': case '&':
- case '(': case ')': case '*': case '+': case ',': case '/':
- case ';': case '=': case '?': case '@': case '~':
- return true;
- }
- return false;
- }
-
- static size_t
- write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
- {
- size_t len = 0;
- for (size_t i = 0; i < n_bytes; ++i) {
- size_t j = i; // Index of next character that must be escaped
- for (; j < n_bytes; ++j) {
- if (lname_must_escape(utf8[j])) {
- break;
- }
- }
-
- // Bulk write all characters up to this special one
- len += sink(&utf8[i], j - i, writer);
- if ((i = j) == n_bytes) {
- break; // Reached end
- }
-
- // Write escape
- len += sink("\\", 1, writer);
- len += sink(&utf8[i], 1, writer);
- }
- return len;
- }
-
- static size_t
- write_text(SerdWriter* writer, TextContext ctx,
- const uint8_t* utf8, size_t n_bytes)
- {
- size_t len = 0;
- for (size_t i = 0; i < n_bytes;) {
- // Fast bulk write for long strings of printable ASCII
- size_t j = i;
- for (; j < n_bytes; ++j) {
- if (utf8[j] == '\\' || utf8[j] == '"'
- || (!in_range(utf8[j], 0x20, 0x7E))) {
- break;
- }
- }
-
- len += sink(&utf8[i], j - i, writer);
- if ((i = j) == n_bytes) {
- break; // Reached end
- }
-
- uint8_t in = utf8[i++];
- if (ctx == WRITE_LONG_STRING) {
- switch (in) {
- case '\\': len += sink("\\\\", 2, writer); continue;
- case '\b': len += sink("\\b", 2, writer); continue;
- case '\n': case '\r': case '\t': case '\f':
- len += sink(&in, 1, writer); // Write character as-is
- continue;
- case '\"':
- if (i == n_bytes) { // '"' at string end
- len += sink("\\\"", 2, writer);
- } else {
- len += sink(&in, 1, writer);
- }
- continue;
- default: break;
- }
- } else if (ctx == WRITE_STRING) {
- switch (in) {
- case '\\': len += sink("\\\\", 2, writer); continue;
- case '\n': len += sink("\\n", 2, writer); continue;
- case '\r': len += sink("\\r", 2, writer); continue;
- case '\t': len += sink("\\t", 2, writer); continue;
- case '"': len += sink("\\\"", 2, writer); continue;
- default: break;
- }
- if (writer->syntax != SERD_NTRIPLES) {
- switch (in) {
- case '\b': len += sink("\\b", 2, writer); continue;
- case '\f': len += sink("\\f", 2, writer); continue;
- }
- }
- }
-
- size_t size = 0;
- len += write_character(writer, utf8 + i - 1, &size);
-
- if (size == 0) {
- return len;
- }
-
- i += size - 1;
- }
- return len;
- }
-
- static size_t
- uri_sink(const void* buf, size_t len, void* stream)
- {
- return write_uri((SerdWriter*)stream, (const uint8_t*)buf, len);
- }
-
- static void
- write_newline(SerdWriter* writer)
- {
- sink("\n", 1, writer);
- for (unsigned i = 0; i < writer->indent; ++i) {
- sink("\t", 1, writer);
- }
- }
-
- static void
- write_sep(SerdWriter* writer, const Sep sep)
- {
- const SepRule* rule = &rules[sep];
- if (rule->space_before) {
- write_newline(writer);
- }
- if (rule->str) {
- sink(rule->str, rule->len, writer);
- }
- if ( (writer->last_sep && rule->space_after_sep)
- || (!writer->last_sep && rule->space_after_node)) {
- write_newline(writer);
- } else if (writer->last_sep && rule->space_after_node) {
- sink(" ", 1, writer);
- }
- writer->last_sep = sep;
- }
-
- static SerdStatus
- reset_context(SerdWriter* writer, bool del)
- {
- if (del) {
- serd_node_free(&writer->context.graph);
- serd_node_free(&writer->context.subject);
- serd_node_free(&writer->context.predicate);
- writer->context = WRITE_CONTEXT_NULL;
- } else {
- writer->context.graph.type = SERD_NOTHING;
- writer->context.subject.type = SERD_NOTHING;
- writer->context.predicate.type = SERD_NOTHING;
- }
- writer->empty = false;
- return SERD_SUCCESS;
- }
-
- typedef enum {
- FIELD_NONE,
- FIELD_SUBJECT,
- FIELD_PREDICATE,
- FIELD_OBJECT
- } Field;
-
- static bool
- is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags)
- {
- return (writer->syntax != SERD_NTRIPLES &&
- ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) ||
- (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN))));
- }
-
- static bool
- write_node(SerdWriter* writer,
- const SerdNode* node,
- const SerdNode* datatype,
- const SerdNode* lang,
- Field field,
- SerdStatementFlags flags)
- {
- SerdChunk uri_prefix;
- SerdNode prefix;
- SerdChunk suffix;
- bool has_scheme;
- switch (node->type) {
- case SERD_BLANK:
- if (is_inline_start(writer, field, flags)) {
- ++writer->indent;
- write_sep(writer, SEP_ANON_BEGIN);
- } else if (writer->syntax != SERD_NTRIPLES
- && (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN))) {
- assert(writer->list_depth == 0);
- copy_node(&writer->list_subj, node);
- ++writer->list_depth;
- ++writer->indent;
- write_sep(writer, SEP_LIST_BEGIN);
- } else if (writer->syntax != SERD_NTRIPLES
- && (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN))) {
- ++writer->indent;
- ++writer->list_depth;
- write_sep(writer, SEP_LIST_BEGIN);
- } else if (writer->syntax != SERD_NTRIPLES
- && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S))
- || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) {
- sink("[]", 2, writer);
- } else {
- sink("_:", 2, writer);
- if (writer->bprefix && !strncmp((const char*)node->buf,
- (const char*)writer->bprefix,
- writer->bprefix_len)) {
- sink(node->buf + writer->bprefix_len,
- node->n_bytes - writer->bprefix_len,
- writer);
- } else {
- sink(node->buf, node->n_bytes, writer);
- }
- }
- break;
- case SERD_CURIE:
- switch (writer->syntax) {
- case SERD_NTRIPLES:
- if (serd_env_expand(writer->env, node, &uri_prefix, &suffix)) {
- w_err(writer, SERD_ERR_BAD_CURIE,
- "undefined namespace prefix `%s'\n", node->buf);
- return false;
- }
- sink("<", 1, writer);
- write_uri(writer, uri_prefix.buf, uri_prefix.len);
- write_uri(writer, suffix.buf, suffix.len);
- sink(">", 1, writer);
- break;
- case SERD_TURTLE:
- if (is_inline_start(writer, field, flags)) {
- ++writer->indent;
- write_sep(writer, SEP_ANON_BEGIN);
- sink("== ", 3, writer);
- }
- write_lname(writer, node->buf, node->n_bytes);
- if (is_inline_start(writer, field, flags)) {
- sink(" ;", 2, writer);
- write_newline(writer);
- }
- }
- break;
- case SERD_LITERAL:
- if (writer->syntax == SERD_TURTLE && datatype && datatype->buf) {
- const char* type_uri = (const char*)datatype->buf;
- if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && (
- !strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") ||
- !strcmp(type_uri + sizeof(NS_XSD) - 1, "integer"))) {
- sink(node->buf, node->n_bytes, writer);
- break;
- } else if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) &&
- !strcmp(type_uri + sizeof(NS_XSD) - 1, "decimal") &&
- strchr((const char*)node->buf, '.') &&
- node->buf[node->n_bytes - 1] != '.') {
- /* xsd:decimal literals without trailing digits, e.g. "5.", can
- not be written bare in Turtle. We could add a 0 which is
- prettier, but changes the text and breaks round tripping.
- */
- sink(node->buf, node->n_bytes, writer);
- break;
- }
- }
- if (writer->syntax != SERD_NTRIPLES
- && (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) {
- sink("\"\"\"", 3, writer);
- write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes);
- sink("\"\"\"", 3, writer);
- } else {
- sink("\"", 1, writer);
- write_text(writer, WRITE_STRING, node->buf, node->n_bytes);
- sink("\"", 1, writer);
- }
- if (lang && lang->buf) {
- sink("@", 1, writer);
- sink(lang->buf, lang->n_bytes, writer);
- } else if (datatype && datatype->buf) {
- sink("^^", 2, writer);
- write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags);
- }
- break;
- case SERD_URI:
- if (is_inline_start(writer, field, flags)) {
- ++writer->indent;
- write_sep(writer, SEP_ANON_BEGIN);
- sink("== ", 3, writer);
- }
- has_scheme = serd_uri_string_has_scheme(node->buf);
- if (field == FIELD_PREDICATE && (writer->syntax == SERD_TURTLE)
- && !strcmp((const char*)node->buf, NS_RDF "type")) {
- sink("a", 1, writer);
- break;
- } else if ((writer->syntax == SERD_TURTLE)
- && !strcmp((const char*)node->buf, NS_RDF "nil")) {
- sink("()", 2, writer);
- break;
- } else if (has_scheme && (writer->style & SERD_STYLE_CURIED) &&
- serd_env_qualify(writer->env, node, &prefix, &suffix)) {
- write_uri(writer, prefix.buf, prefix.n_bytes);
- sink(":", 1, writer);
- write_uri(writer, suffix.buf, suffix.len);
- break;
- }
- sink("<", 1, writer);
- if (writer->style & SERD_STYLE_RESOLVED) {
- SerdURI in_base_uri, uri, abs_uri;
- serd_env_get_base_uri(writer->env, &in_base_uri);
- serd_uri_parse(node->buf, &uri);
- serd_uri_resolve(&uri, &in_base_uri, &abs_uri);
- bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri);
- SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri;
- if (!uri_is_under(&abs_uri, root) ||
- writer->syntax == SERD_NTRIPLES) {
- serd_uri_serialise(&abs_uri, uri_sink, writer);
- } else {
- serd_uri_serialise_relative(
- &uri, &writer->base_uri, root, uri_sink, writer);
- }
- } else {
- write_uri(writer, node->buf, node->n_bytes);
- }
- sink(">", 1, writer);
- if (is_inline_start(writer, field, flags)) {
- sink(" ;", 2, writer);
- write_newline(writer);
- }
- default:
- break;
- }
- writer->last_sep = SEP_NONE;
- return true;
- }
-
- static inline bool
- is_resource(const SerdNode* node)
- {
- return node->type > SERD_LITERAL;
- }
-
- static void
- write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred)
- {
- write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags);
- write_sep(writer, SEP_P_O);
- copy_node(&writer->context.predicate, pred);
- }
-
- static bool
- write_list_obj(SerdWriter* writer,
- SerdStatementFlags flags,
- const SerdNode* predicate,
- const SerdNode* object,
- const SerdNode* datatype,
- const SerdNode* lang)
- {
- if (!strcmp((const char*)object->buf, NS_RDF "nil")) {
- --writer->indent;
- write_sep(writer, SEP_LIST_END);
- return true;
- } else if (!strcmp((const char*)predicate->buf, NS_RDF "first")) {
- write_sep(writer, SEP_LIST_SEP);
- write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
- }
- return false;
- }
-
- SERD_API
- SerdStatus
- serd_writer_write_statement(SerdWriter* writer,
- SerdStatementFlags flags,
- const SerdNode* graph,
- const SerdNode* subject,
- const SerdNode* predicate,
- const SerdNode* object,
- const SerdNode* datatype,
- const SerdNode* lang)
- {
- if (!subject || !predicate || !object
- || !subject->buf || !predicate->buf || !object->buf
- || !is_resource(subject) || !is_resource(predicate)) {
- return SERD_ERR_BAD_ARG;
- }
-
- #define TRY(write_result) \
- if (!write_result) { \
- return SERD_ERR_UNKNOWN; \
- }
-
- switch (writer->syntax) {
- case SERD_NTRIPLES:
- TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags));
- sink(" ", 1, writer);
- TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags));
- sink(" ", 1, writer);
- TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags));
- sink(" .\n", 3, writer);
- return SERD_SUCCESS;
- default:
- break;
- }
-
- if ((flags & SERD_LIST_CONT)) {
- if (write_list_obj(writer, flags, predicate, object, datatype, lang)) {
- // Reached end of list
- if (--writer->list_depth == 0 && writer->list_subj.type) {
- reset_context(writer, true);
- writer->context.subject = writer->list_subj;
- writer->list_subj = SERD_NODE_NULL;
- }
- return SERD_SUCCESS;
- }
- } else if (serd_node_equals(subject, &writer->context.subject)) {
- if (serd_node_equals(predicate, &writer->context.predicate)) {
- // Abbreviate S P
- if (!(flags & SERD_ANON_O_BEGIN)) {
- ++writer->indent;
- }
- write_sep(writer, SEP_END_O);
- write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
- if (!(flags & SERD_ANON_O_BEGIN)) {
- --writer->indent;
- }
- } else {
- // Abbreviate S
- Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P;
- write_sep(writer, sep);
- write_pred(writer, flags, predicate);
- write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
- }
- } else {
- // No abbreviation
- if (writer->context.subject.type) {
- assert(writer->indent > 0);
- --writer->indent;
- if (serd_stack_is_empty(&writer->anon_stack)) {
- write_sep(writer, SEP_END_S);
- }
- } else if (!writer->empty) {
- write_sep(writer, SEP_S_P);
- }
-
- if (!(flags & SERD_ANON_CONT)) {
- write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags);
- ++writer->indent;
- write_sep(writer, SEP_S_P);
- } else {
- ++writer->indent;
- }
-
- reset_context(writer, true);
- copy_node(&writer->context.subject, subject);
-
- if (!(flags & SERD_LIST_S_BEGIN)) {
- write_pred(writer, flags, predicate);
- }
-
- write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
- }
-
- if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) {
- WriteContext* ctx = (WriteContext*)serd_stack_push(
- &writer->anon_stack, sizeof(WriteContext));
- *ctx = writer->context;
- WriteContext new_context = {
- serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL };
- if ((flags & SERD_ANON_S_BEGIN)) {
- new_context.predicate = serd_node_copy(predicate);
- }
- writer->context = new_context;
- } else {
- copy_node(&writer->context.graph, graph);
- copy_node(&writer->context.subject, subject);
- copy_node(&writer->context.predicate, predicate);
- }
-
- return SERD_SUCCESS;
- }
-
- SERD_API
- SerdStatus
- serd_writer_end_anon(SerdWriter* writer,
- const SerdNode* node)
- {
- if (writer->syntax == SERD_NTRIPLES) {
- return SERD_SUCCESS;
- }
- if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) {
- w_err(writer, SERD_ERR_UNKNOWN,
- "unexpected end of anonymous node\n");
- return SERD_ERR_UNKNOWN;
- }
- --writer->indent;
- write_sep(writer, SEP_ANON_END);
- reset_context(writer, true);
- writer->context = *anon_stack_top(writer);
- serd_stack_pop(&writer->anon_stack, sizeof(WriteContext));
- const bool is_subject = serd_node_equals(node, &writer->context.subject);
- if (is_subject) {
- copy_node(&writer->context.subject, node);
- writer->context.predicate.type = SERD_NOTHING;
- }
- return SERD_SUCCESS;
- }
-
- SERD_API
- SerdStatus
- serd_writer_finish(SerdWriter* writer)
- {
- if (writer->context.subject.type) {
- sink(" .\n", 3, writer);
- }
- if (writer->style & SERD_STYLE_BULK) {
- serd_bulk_sink_flush(&writer->bulk_sink);
- }
- writer->indent = 0;
- return reset_context(writer, true);
- }
-
- SERD_API
- SerdWriter*
- serd_writer_new(SerdSyntax syntax,
- SerdStyle style,
- SerdEnv* env,
- const SerdURI* base_uri,
- SerdSink ssink,
- void* stream)
- {
- const WriteContext context = WRITE_CONTEXT_NULL;
- SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter));
- writer->syntax = syntax;
- writer->style = style;
- writer->env = env;
- writer->root_node = SERD_NODE_NULL;
- writer->root_uri = SERD_URI_NULL;
- writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL;
- writer->anon_stack = serd_stack_new(sizeof(WriteContext));
- writer->sink = ssink;
- writer->stream = stream;
- writer->context = context;
- writer->list_subj = SERD_NODE_NULL;
- writer->empty = true;
- if (style & SERD_STYLE_BULK) {
- writer->bulk_sink = serd_bulk_sink_new(ssink, stream, SERD_PAGE_SIZE);
- }
- return writer;
- }
-
- SERD_API
- void
- serd_writer_set_error_sink(SerdWriter* writer,
- SerdErrorSink error_sink,
- void* error_handle)
- {
- writer->error_sink = error_sink;
- writer->error_handle = error_handle;
- }
-
- SERD_API
- void
- serd_writer_chop_blank_prefix(SerdWriter* writer,
- const uint8_t* prefix)
- {
- free(writer->bprefix);
- writer->bprefix_len = 0;
- writer->bprefix = NULL;
- if (prefix) {
- writer->bprefix_len = strlen((const char*)prefix);
- writer->bprefix = (uint8_t*)malloc(writer->bprefix_len + 1);
- memcpy(writer->bprefix, prefix, writer->bprefix_len + 1);
- }
- }
-
- SERD_API
- SerdStatus
- serd_writer_set_base_uri(SerdWriter* writer,
- const SerdNode* uri)
- {
- if (!serd_env_set_base_uri(writer->env, uri)) {
- serd_env_get_base_uri(writer->env, &writer->base_uri);
-
- if (writer->syntax != SERD_NTRIPLES) {
- if (writer->context.graph.type || writer->context.subject.type) {
- sink(" .\n\n", 4, writer);
- reset_context(writer, false);
- }
- sink("@base <", 7, writer);
- sink(uri->buf, uri->n_bytes, writer);
- sink("> .\n", 4, writer);
- }
- writer->indent = 0;
- return reset_context(writer, false);
- }
- return SERD_ERR_UNKNOWN;
- }
-
- SERD_API
- SerdStatus
- serd_writer_set_root_uri(SerdWriter* writer,
- const SerdNode* uri)
- {
- serd_node_free(&writer->root_node);
- if (uri && uri->buf) {
- writer->root_node = serd_node_copy(uri);
- serd_uri_parse(uri->buf, &writer->root_uri);
- } else {
- writer->root_node = SERD_NODE_NULL;
- writer->root_uri = SERD_URI_NULL;
- }
- return SERD_SUCCESS;
- }
-
- SERD_API
- SerdStatus
- serd_writer_set_prefix(SerdWriter* writer,
- const SerdNode* name,
- const SerdNode* uri)
- {
- if (!serd_env_set_prefix(writer->env, name, uri)) {
- if (writer->syntax != SERD_NTRIPLES) {
- if (writer->context.graph.type || writer->context.subject.type) {
- sink(" .\n\n", 4, writer);
- reset_context(writer, false);
- }
- sink("@prefix ", 8, writer);
- sink(name->buf, name->n_bytes, writer);
- sink(": <", 3, writer);
- write_uri(writer, uri->buf, uri->n_bytes);
- sink("> .\n", 4, writer);
- }
- writer->indent = 0;
- return reset_context(writer, false);
- }
- return SERD_ERR_UNKNOWN;
- }
-
- SERD_API
- void
- serd_writer_free(SerdWriter* writer)
- {
- serd_writer_finish(writer);
- serd_stack_free(&writer->anon_stack);
- free(writer->bprefix);
- if (writer->style & SERD_STYLE_BULK) {
- serd_bulk_sink_free(&writer->bulk_sink);
- }
- serd_node_free(&writer->root_node);
- free(writer);
- }
-
- SERD_API
- SerdEnv*
- serd_writer_get_env(SerdWriter* writer)
- {
- return writer->env;
- }
-
- SERD_API
- size_t
- serd_file_sink(const void* buf, size_t len, void* stream)
- {
- return fwrite(buf, 1, len, (FILE*)stream);
- }
-
- SERD_API
- size_t
- serd_chunk_sink(const void* buf, size_t len, void* stream)
- {
- SerdChunk* chunk = (SerdChunk*)stream;
- chunk->buf = (uint8_t*)realloc((uint8_t*)chunk->buf, chunk->len + len);
- memcpy((uint8_t*)chunk->buf + chunk->len, buf, len);
- chunk->len += len;
- return len;
- }
-
- SERD_API
- uint8_t*
- serd_chunk_sink_finish(SerdChunk* stream)
- {
- serd_chunk_sink("", 1, stream);
- return (uint8_t*)stream->buf;
- }
|