From b38bfe756fa0121671e46bf8fbcdd79433ca01d9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 Jan 2015 11:13:46 -0800 Subject: [PATCH 1/3] Added scaffolding for mom writer. Currently it just outptus man format. --- man/man1/cmark.1 | 2 +- man/man3/cmark.3 | 8 +- src/CMakeLists.txt | 1 + src/cmark.h | 5 + src/main.c | 8 +- src/mom.c | 249 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 270 insertions(+), 3 deletions(-) create mode 100644 src/mom.c diff --git a/man/man1/cmark.1 b/man/man1/cmark.1 index c425b8ca..340822bf 100644 --- a/man/man1/cmark.1 +++ b/man/man1/cmark.1 @@ -21,7 +21,7 @@ concatenated before parsing. .SH "OPTIONS" .TP 12n \-\-to, \-t \f[I]FORMAT\f[] -Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]ast\f[]). +Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]mom\f[], \f[C]xml\f[]). .TP 12n \-\-sourcepos Include source position attribute. diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 index 5df89c33..c7a991ec 100644 --- a/man/man3/cmark.3 +++ b/man/man3/cmark.3 @@ -1,4 +1,4 @@ -.TH cmark 3 "January 11, 2015" "LOCAL" "Library Functions Manual" +.TH cmark 3 "January 19, 2015" "LOCAL" "Library Functions Manual" .SH NAME .PP @@ -465,6 +465,12 @@ to add an appropriate header and footer. .PP Render a \f[I]node\f[] tree as a groff man page, without the header. +.PP +\fIchar *\f[] \fBcmark_render_mom\f[](\fIcmark_node *root\f[], \fIlong options\f[]) + +.PP +Render a \f[I]node\f[] tree in groff mom format, without the header. + .PP .nf \fC diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 254c065a..d8303a38 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,6 +29,7 @@ set(LIBRARY_SOURCES buffer.c references.c man.c + mom.c xml.c html.c html_unescape.gperf diff --git a/src/cmark.h b/src/cmark.h index 04ca6d72..b09b6054 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -466,6 +466,11 @@ char *cmark_render_html(cmark_node *root, long options); CMARK_EXPORT char *cmark_render_man(cmark_node *root, long options); +/** Render a 'node' tree in groff mom format, without the header. + */ +CMARK_EXPORT +char *cmark_render_mom(cmark_node *root, long options); + /** Default writer options. */ #define CMARK_OPT_DEFAULT 0 diff --git a/src/main.c b/src/main.c index 58558689..d623caa0 100644 --- a/src/main.c +++ b/src/main.c @@ -17,13 +17,14 @@ typedef enum { FORMAT_HTML, FORMAT_XML, FORMAT_MAN, + FORMAT_MOM } writer_format; void print_usage() { printf("Usage: cmark [FILE*]\n"); printf("Options:\n"); - printf(" --to, -t FORMAT Specify output format (html, xml, man)\n"); + printf(" --to, -t FORMAT Specify output format (html, xml, man, mom)\n"); printf(" --sourcepos Include source position attribute\n"); printf(" --hardbreaks Treat newlines as hard line breaks\n"); printf(" --normalize Consolidate adjacent text nodes\n"); @@ -45,6 +46,9 @@ static void print_document(cmark_node *document, writer_format writer, case FORMAT_MAN: result = cmark_render_man(document, options); break; + case FORMAT_MOM: + result = cmark_render_mom(document, options); + break; default: fprintf(stderr, "Unknown format %d\n", writer); exit(1); @@ -94,6 +98,8 @@ int main(int argc, char *argv[]) writer = FORMAT_MAN; } else if (strcmp(argv[i], "html") == 0) { writer = FORMAT_HTML; + } else if (strcmp(argv[i], "mom") == 0) { + writer = FORMAT_MOM; } else if (strcmp(argv[i], "xml") == 0) { writer = FORMAT_XML; } else { diff --git a/src/mom.c b/src/mom.c new file mode 100644 index 00000000..9412eaa9 --- /dev/null +++ b/src/mom.c @@ -0,0 +1,249 @@ +#include +#include +#include +#include + +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" + +// Functions to convert cmark_nodes to groff mom strings. + +static void escape_mom(cmark_strbuf *dest, const unsigned char *source, int length) +{ + int i; + unsigned char c; + + for (i = 0; i < length; i++) { + c = source[i]; + if (c == '.' && i == 0) { + cmark_strbuf_puts(dest, "\\&."); + } else if (c == '\'' && i == 0) { + cmark_strbuf_puts(dest, "\\&'"); + } else if (c == '-') { + cmark_strbuf_puts(dest, "\\-"); + } else if (c == '\\') { + cmark_strbuf_puts(dest, "\\e"); + } else { + cmark_strbuf_putc(dest, source[i]); + } + } +} + +static inline void cr(cmark_strbuf *mom) +{ + if (mom->size && mom->ptr[mom->size - 1] != '\n') + cmark_strbuf_putc(mom, '\n'); +} + +struct render_state { + cmark_strbuf* mom; + cmark_node *plain; +}; + +static int +S_render_node(cmark_node *node, cmark_event_type ev_type, + struct render_state *state) +{ + cmark_node *tmp; + cmark_strbuf *mom = state->mom; + int list_number; + bool entering = (ev_type == CMARK_EVENT_ENTER); + + if (state->plain == node) { // back at original node + state->plain = NULL; + } + + if (state->plain != NULL) { + switch(node->type) { + case CMARK_NODE_TEXT: + case CMARK_NODE_CODE: + escape_mom(mom, node->as.literal.data, + node->as.literal.len); + break; + + case CMARK_NODE_LINEBREAK: + case CMARK_NODE_SOFTBREAK: + cmark_strbuf_putc(mom, ' '); + break; + + default: + break; + } + return 1; + } + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + break; + + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { + cr(mom); + cmark_strbuf_puts(mom, ".RS"); + cr(mom); + } else { + cr(mom); + cmark_strbuf_puts(mom, ".RE"); + cr(mom); + } + break; + + case CMARK_NODE_LIST: + break; + + case CMARK_NODE_ITEM: + if (entering) { + cr(mom); + cmark_strbuf_puts(mom, ".IP "); + if (cmark_node_get_list_type(node->parent) == + CMARK_BULLET_LIST) { + cmark_strbuf_puts(mom, "\\[bu] 2"); + } else { + list_number = cmark_node_get_list_start(node->parent); + tmp = node; + while (tmp->prev) { + tmp = tmp->prev; + list_number += 1; + } + cmark_strbuf_printf(mom, "\"%d.\" 4", list_number); + } + cr(mom); + } else { + cr(mom); + } + break; + + case CMARK_NODE_HEADER: + if (entering) { + cr(mom); + cmark_strbuf_puts(mom, + cmark_node_get_header_level(node) == 1 ? + ".SH" : ".SS"); + cr(mom); + } else { + cr(mom); + } + break; + + case CMARK_NODE_CODE_BLOCK: + cr(mom); + cmark_strbuf_puts(mom, ".IP\n.nf\n\\f[C]\n"); + escape_mom(mom, node->as.code.literal.data, + node->as.code.literal.len); + cr(mom); + cmark_strbuf_puts(mom, "\\f[]\n.fi"); + cr(mom); + break; + + case CMARK_NODE_HTML: + break; + + case CMARK_NODE_HRULE: + cr(mom); + cmark_strbuf_puts(mom, ".PP\n * * * * *"); + cr(mom); + break; + + case CMARK_NODE_PARAGRAPH: + if (entering) { + // no blank line if first paragraph in list: + if (node->parent && + node->parent->type == CMARK_NODE_ITEM && + node->prev == NULL) { + // no blank line or .PP + } else { + cr(mom); + cmark_strbuf_puts(mom, ".PP\n"); + } + } else { + cr(mom); + } + break; + + case CMARK_NODE_TEXT: + escape_mom(mom, node->as.literal.data, + node->as.literal.len); + break; + + case CMARK_NODE_LINEBREAK: + cmark_strbuf_puts(mom, ".PD 0\n.P\n.PD"); + cr(mom); + break; + + case CMARK_NODE_SOFTBREAK: + cmark_strbuf_putc(mom, '\n'); + break; + + case CMARK_NODE_CODE: + cmark_strbuf_puts(mom, "\\f[C]"); + escape_mom(mom, node->as.literal.data, node->as.literal.len); + cmark_strbuf_puts(mom, "\\f[]"); + break; + + case CMARK_NODE_INLINE_HTML: + break; + + case CMARK_NODE_STRONG: + if (entering) { + cmark_strbuf_puts(mom, "\\f[B]"); + } else { + cmark_strbuf_puts(mom, "\\f[]"); + } + break; + + case CMARK_NODE_EMPH: + if (entering) { + cmark_strbuf_puts(mom, "\\f[I]"); + } else { + cmark_strbuf_puts(mom, "\\f[]"); + } + break; + + case CMARK_NODE_LINK: + if (!entering) { + cmark_strbuf_printf(mom, " (%s)", + cmark_node_get_url(node)); + } + break; + + case CMARK_NODE_IMAGE: + if (entering) { + cmark_strbuf_puts(mom, "[IMAGE: "); + state->plain = node; + } else { + cmark_strbuf_puts(mom, "]"); + } + break; + + default: + assert(false); + break; + } + + // cmark_strbuf_putc(mom, 'x'); + return 1; +} + +char *cmark_render_mom(cmark_node *root, long options) +{ + char *result; + cmark_strbuf mom = GH_BUF_INIT; + struct render_state state = { &mom, NULL }; + cmark_node *cur; + cmark_event_type ev_type; + cmark_iter *iter = cmark_iter_new(root); + + if (options == 0) options = 0; // avoid warning about unused parameters + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + S_render_node(cur, ev_type, &state); + } + result = (char *)cmark_strbuf_detach(&mom); + + cmark_iter_free(iter); + cmark_strbuf_free(&mom); + return result; +} From dec67e41b0559d99c647937c25d02d31573bc5e5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 Jan 2015 12:01:20 -0800 Subject: [PATCH 2/3] Implemented mom output. TODO: links, images. --- src/mom.c | 60 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/src/mom.c b/src/mom.c index 9412eaa9..f4dc2c0a 100644 --- a/src/mom.c +++ b/src/mom.c @@ -46,9 +46,9 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, struct render_state *state) { - cmark_node *tmp; cmark_strbuf *mom = state->mom; - int list_number; + int list_start; + int list_delim; bool entering = (ev_type == CMARK_EVENT_ENTER); if (state->plain == node) { // back at original node @@ -81,34 +81,39 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_BLOCK_QUOTE: if (entering) { cr(mom); - cmark_strbuf_puts(mom, ".RS"); + cmark_strbuf_puts(mom, ".BLOCKQUOTE"); cr(mom); } else { cr(mom); - cmark_strbuf_puts(mom, ".RE"); + cmark_strbuf_puts(mom, ".BLOCKQUOTE OFF"); cr(mom); } break; case CMARK_NODE_LIST: + cr(mom); + if (cmark_node_get_list_type(node) == + CMARK_BULLET_LIST) { + cmark_strbuf_puts(mom, ".LIST BULLET"); + } else { + list_start = cmark_node_get_list_start(node->parent); + list_delim = cmark_node_get_list_delim(node->parent); + cmark_strbuf_printf(mom, ".LIST DIGIT %s", + list_delim == CMARK_PAREN_DELIM ? + ")" : "."); + if (list_start != 1) { + cr(mom); + cmark_strbuf_printf(mom, ".RESET_LIST %d", list_start); + cr(mom); + } + } + cr(mom); break; case CMARK_NODE_ITEM: if (entering) { cr(mom); - cmark_strbuf_puts(mom, ".IP "); - if (cmark_node_get_list_type(node->parent) == - CMARK_BULLET_LIST) { - cmark_strbuf_puts(mom, "\\[bu] 2"); - } else { - list_number = cmark_node_get_list_start(node->parent); - tmp = node; - while (tmp->prev) { - tmp = tmp->prev; - list_number += 1; - } - cmark_strbuf_printf(mom, "\"%d.\" 4", list_number); - } + cmark_strbuf_puts(mom, ".ITEM"); cr(mom); } else { cr(mom); @@ -118,22 +123,22 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_HEADER: if (entering) { cr(mom); - cmark_strbuf_puts(mom, - cmark_node_get_header_level(node) == 1 ? - ".SH" : ".SS"); - cr(mom); + cmark_strbuf_printf(mom, + ".HEADING %d \"", + cmark_node_get_header_level(node)); } else { + cmark_strbuf_printf(mom, "\""); cr(mom); } break; case CMARK_NODE_CODE_BLOCK: cr(mom); - cmark_strbuf_puts(mom, ".IP\n.nf\n\\f[C]\n"); + cmark_strbuf_puts(mom, ".QUOTE\n.CODE\n"); escape_mom(mom, node->as.code.literal.data, node->as.code.literal.len); cr(mom); - cmark_strbuf_puts(mom, "\\f[]\n.fi"); + cmark_strbuf_puts(mom, ".CODE OFF\n.QUOTE OFF"); cr(mom); break; @@ -142,7 +147,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_HRULE: cr(mom); - cmark_strbuf_puts(mom, ".PP\n * * * * *"); + cmark_strbuf_puts(mom, ".DRH"); cr(mom); break; @@ -155,7 +160,8 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, // no blank line or .PP } else { cr(mom); - cmark_strbuf_puts(mom, ".PP\n"); + cmark_strbuf_puts(mom, ".PP"); + cr(mom); } } else { cr(mom); @@ -168,7 +174,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, break; case CMARK_NODE_LINEBREAK: - cmark_strbuf_puts(mom, ".PD 0\n.P\n.PD"); + cmark_strbuf_puts(mom, ".LINEBREAK"); cr(mom); break; @@ -177,7 +183,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, break; case CMARK_NODE_CODE: - cmark_strbuf_puts(mom, "\\f[C]"); + cmark_strbuf_puts(mom, "\\f[CR]"); escape_mom(mom, node->as.literal.data, node->as.literal.len); cmark_strbuf_puts(mom, "\\f[]"); break; From 01e7cfa20bbabe2d9b8f3f0b3f6edac9c7a3bdc7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 Jan 2015 20:38:20 -0800 Subject: [PATCH 3/3] Fixed escaping and lists in mom writer. --- src/mom.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/mom.c b/src/mom.c index f4dc2c0a..b2704064 100644 --- a/src/mom.c +++ b/src/mom.c @@ -14,12 +14,13 @@ static void escape_mom(cmark_strbuf *dest, const unsigned char *source, int leng { int i; unsigned char c; + bool beginLine = true; for (i = 0; i < length; i++) { c = source[i]; - if (c == '.' && i == 0) { + if (c == '.' && beginLine) { cmark_strbuf_puts(dest, "\\&."); - } else if (c == '\'' && i == 0) { + } else if (c == '\'' && beginLine) { cmark_strbuf_puts(dest, "\\&'"); } else if (c == '-') { cmark_strbuf_puts(dest, "\\-"); @@ -28,6 +29,7 @@ static void escape_mom(cmark_strbuf *dest, const unsigned char *source, int leng } else { cmark_strbuf_putc(dest, source[i]); } + beginLine = (c == '\n'); } } @@ -92,20 +94,23 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_LIST: cr(mom); - if (cmark_node_get_list_type(node) == + if (entering) { + if (cmark_node_get_list_type(node) == CMARK_BULLET_LIST) { - cmark_strbuf_puts(mom, ".LIST BULLET"); - } else { - list_start = cmark_node_get_list_start(node->parent); - list_delim = cmark_node_get_list_delim(node->parent); - cmark_strbuf_printf(mom, ".LIST DIGIT %s", - list_delim == CMARK_PAREN_DELIM ? - ")" : "."); - if (list_start != 1) { - cr(mom); - cmark_strbuf_printf(mom, ".RESET_LIST %d", list_start); - cr(mom); + cmark_strbuf_puts(mom, ".LIST BULLET"); + } else { + list_start = cmark_node_get_list_start(node->parent); + list_delim = cmark_node_get_list_delim(node->parent); + cmark_strbuf_printf(mom, ".LIST DIGIT %s", + list_delim == CMARK_PAREN_DELIM ? + ")" : "."); + if (list_start != 1) { + cr(mom); + cmark_strbuf_printf(mom, ".RESET_LIST %d", list_start); + } } + } else { + cmark_strbuf_puts(mom, ".LIST OFF"); } cr(mom); break;