8000 Support for <indexterm>s when reading DocBook by bigsmoke · Pull Request #7607 · jgm/pandoc · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Support for <indexterm>s when reading DocBook #7607

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 37 additions & 4 deletions src/Text/Pandoc/Readers/DocBook.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import Data.Foldable (asum)
import Data.Generics
import Data.List (intersperse,elemIndex)
import Data.List.NonEmpty (nonEmpty)
import Data.Maybe (fromMaybe,mapMaybe)
import Data.Maybe (catMaybes,fromMaybe,mapMaybe)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
Expand Down Expand Up @@ -316,7 +316,7 @@ List of all DocBook tags, with [x] indicating implemented,
[ ] postcode - A postal code in an address
[x] preface - Introductory matter preceding the first chapter of a book
[ ] prefaceinfo - Meta-information for a Preface
[ ] primary - The primary word or phrase under which an index term should be
[x] primary - The primary word or phrase under which an index term should be
sorted
[ ] primaryie - A primary term in an index entry, not in the text
[ ] printhistory - The printing history of a document
Expand Down Expand Up @@ -385,7 +385,7 @@ List of all DocBook tags, with [x] indicating implemented,
[o] screeninfo - Information about how a screen shot was produced
[ ] screenshot - A representation of what the user sees or might see on a
computer screen
[ ] secondary - A secondary word or phrase in an index term
[x] secondary - A secondary word or phrase in an index term
[ ] secondaryie - A secondary term in an index entry, rather than in the text
[x] sect1 - A top-level section of document
[x] sect1info - Meta-information for a Sect1
Expand Down Expand Up @@ -461,7 +461,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] td - A table entry in an HTML table
[x] term - The word or phrase being defined or described in a variable list
[ ] termdef - An inline term definition
[ ] tertiary - A tertiary word or phrase in an index term
[x] tertiary - A tertiary word or phrase in an index term
[ ] tertiaryie - A tertiary term in an index entry, rather than in the text
[ ] textdata - Pointer to external text data
[ ] textobject - A wrapper for a text description of an object and its
Expand Down Expand Up @@ -1079,6 +1079,17 @@ elementToStr :: Content -> Content
elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x

childElTextAsAttr :: Text -> Element -> Maybe (Text, Text)
childElTextAsAttr n e = case findChild q e of
Nothing -> Nothing
Just childEl -> Just (n, strContentRecursive childEl)
where q = QName n (Just "http://docbook.org/ns/docbook") Nothing

attrValueAsOptionalAttr :: Text -> Element -> Maybe (Text, Text)
attrValueAsOptionalAttr n e = case attrValue n e of
"" -> Nothing
_ -> Just (n, attrValue n e)

parseInline :: PandocMonad m => Content -> DB m Inlines
parseInline (Text (CData _ s _)) = return $ text s
parseInline (CRef ref) =
Expand All @@ -1093,6 +1104,28 @@ parseInline (Elem e) =
if ident /= "" || classes /= []
then innerInlines (spanWith (ident,classes,[]))
else innerInlines id
"indexterm" -> do
let ident = attrValue "id" e
let classes = T.words $ attrValue "role" e
let attrs =
-- In DocBook, <primary>, <secondary>, <tertiary>, <see>, and <seealso>
-- have mixed content models. However, because we're representing these
-- elements in Pandoc's AST as attributes of a phrase, we flatten all
-- the descendant content of these elements.
[ childElTextAsAttr "primary" e
, childElTextAsAttr "secondary" e
, childElTextAsAttr "tertiary" e
, childElTextAsAttr "see" e
, childElTextAsAttr "seealso" e
, attrValueAsOptionalAttr "significance" e
, attrValueAsOptionalAttr "startref" e
, attrValueAsOptionalAttr "scope" e
, attrValueAsOptionalAttr "class" e
-- We don't do anything with the "pagenum" attribute, because these only
-- occur within literal <index> sections, which is not supported by Pandoc,
-- because Pandoc has no concept of pages.
]
return $ spanWith (ident, ("indexterm" : classes), (catMaybes attrs)) mempty
"equation" -> equation e displayMath
"informalequation" -> equation e displayMath
"inlineequation" -> equation e math
Expand Down
12 changes: 12 additions & 0 deletions test/docbook-reader.docbook
Original file line number Diff line number Diff line change
Expand Up @@ -1598,4 +1598,16 @@ or here: &lt;http://example.com/&gt;
</step>
</procedure>
</sect1>
<sect1 id="indexterms">
<title>Index terms</title>
<para>
In the simplest case, index terms<indexterm><primary>index term</primary></indexterm> consists of just a <code>&lt;primary&gt;</code> element, but <indexterm><primary>index term</primary><secondary>multi-level</secondary></indexterm> they can also consist of a <code>&lt;primary&gt;</code> <emph>and</emph> <code>&lt;secondary&gt;</code> element, and <indexterm><primary>index term</primary><secondary>multi-level</secondary><tertiary>3-level</tertiary></indexterm> can even include a <code>&lt;tertiary&gt;</code> term.
</para>
<para>
Index terms can also refer to other index terms: <indexterm><primary>index cross referencing</primary></indexterm><indexterm><primary>index term</primary><secondary>cross references</secondary><see>index cross referencing</see></indexterm>exclusively, using the <code>&lt;see&gt;</code> tag; or <indexterm><primary>index cross referencing</primary><seealso>cross referencing</seealso></indexterm> as a reference to related terms, using the <code>&lt;seealso&gt;</code> tag.
</para>
<para>
<indexterm><primary>food</primary><secondary>big <foreignphrase>baguette</foreignphrase> <strong>supreme</strong></secondary></indexterm>Nested content in index term elements is flattened.
</para>
</sect1>
</article>
187 changes: 187 additions & 0 deletions test/docbook-reader.native
Original file line number Diff line number Diff line change
Expand Up @@ -2927,4 +2927,191 @@ Pandoc
[ Str "A" , Space , Str "Final" , Space , Str "Step" ]
]
]
, Header
1
( "indexterms" , [] , [] )
[ Str "Index" , Space , Str "terms" ]
, Para
[ Str "In"
, Space
, Str "the"
, Space
, Str "simplest"
, Space
, Str "case,"
, Space
, Str "index"
, Space
, Str "terms"
, Span
( "" , [ "indexterm" ] , [ ( "primary" , "index term" ) ] )
[]
, Space
, Str "consists"
, Space
, Str "of"
, Space
, Str "just"
, Space
, Str "a"
, Space
, Code ( "" , [] , [] ) "<primary>"
, Space
, Str "element,"
, Space
, Str "but"
, Space
, Span
( ""
, [ "indexterm" ]
, [ ( "primary" , "index term" )
, ( "secondary" , "multi-level" )
]
)
[]
, Space
, Str "they"
, Space
, Str "can"
, Space
, Str "also"
, Space
, Str "consist"
, Space
, Str "of"
, Space
, Str "a"
, Space
, Code ( "" , [] , [] ) "<primary>"
, Space
, Str "and"
, Space
, Code ( "" , [] , [] ) "<secondary>"
, Space
, Str "element,"
, Space
, Str "and"
, Space
, Span
( ""
, [ "indexterm" ]
, [ ( "primary" , "index term" )
, ( "secondary" , "multi-level" )
, ( "tertiary" , "3-level" )
]
)
[]
, Space
, Str "can"
, Space
, Str "even"
, Space
, Str "include"
, Space
, Str "a"
, Space
, Code ( "" , [] , [] ) "<tertiary>"
, Space
, Str "term."
]
, Para
[ Str "Index"
, Space
, Str "terms"
, Space
, Str "can"
, Space
, Str "also"
, Space
, Str "refer"
, Space
, Str "to"
, Space
, Str "other"
, Space
, Str "index"
, Space
, Str "terms:"
, Space
, Span
( ""
, [ "indexterm" ]
, [ ( "primary" , "index cross referencing" ) ]
)
[]
, Span
( ""
, [ "indexterm" ]
, [ ( "primary" , "index term" )
, ( "secondary" , "cross references" )
, ( "see" , "index cross referencing" )
]
)
[]
, Str "exclusively,"
, Space
, Str "using"
, Space
, Str "the"
, Space
, Code ( "" , [] , [] ) "<see>"
, Space
, Str "tag;"
, Space
, Str "or"
, Space
, Span
( ""
, [ "indexterm" ]
, [ ( "primary" , "index cross referencing" )
, ( "seealso" , "cross referencing" )
]
)
[]
, Space
, Str "as"
, Space
, Str "a"
, Space
, Str "reference"
, Space
, Str "to"
, Space
, Str "related"
, Space
, Str "terms,"
, Space
, Str "using"
, Space
, Str "the"
, Space
, Code ( "" , [] , [] ) "<seealso>"
, Space
, Str "tag."
]
, Para
[ Span
( ""
, [ "indexterm" ]
, [ ( "primary" , "food" )
, ( "secondary" , "big baguette supreme" )
]
)
[]
, Str "Nested"
, Space
, Str "content"
, Space
, Str "in"
, Space
, Str "index"
, Space
, Str "term"
, Space
, Str "elements"
, Space
, Str "is"
, Space
, Str "flattened."
]
]
0