635 lines
16 KiB
Ruby
635 lines
16 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module RBMark
|
|
# Module for representing parsing-related constructs
|
|
module Parsing
|
|
# Abstract scanner interface implementation
|
|
class Scanner
|
|
def initialize
|
|
@variants = []
|
|
end
|
|
|
|
# Scan text
|
|
# @param text [String]
|
|
# @return [Array<RBMark::DOM::DOMObject>]
|
|
def scan(_text)
|
|
raise StandardError, "Abstract method called"
|
|
# ...
|
|
end
|
|
|
|
attr_accessor :variants
|
|
end
|
|
|
|
# Line-level scanner for blocks
|
|
class LineScanner < Scanner
|
|
# (see ::RBMark::Parsing::Scanner#scan)
|
|
def scan(text, buffer: "", blocks: [], mode: nil)
|
|
prepare
|
|
lines = text.lines
|
|
lines.each_with_index do |line, index|
|
|
buffer += line
|
|
ahead = lines.fetch(index + 1, nil)
|
|
blocks, buffer, mode = try_begin(line,
|
|
blocks,
|
|
buffer,
|
|
mode,
|
|
lookahead: ahead)
|
|
if mode&.end?(line, lookahead: ahead, blocks: blocks, buffer: buffer)
|
|
blocks, buffer, mode = flush(blocks, buffer, mode)
|
|
end
|
|
end
|
|
flush(blocks, buffer, mode)[0]
|
|
end
|
|
|
|
# Predict mode for given line
|
|
# @param line [String]
|
|
# @return [Object]
|
|
def select_mode(line, **message)
|
|
@variants.find do |variant|
|
|
variant[0].begin?(line, **message)
|
|
end&.at(0)
|
|
end
|
|
|
|
private
|
|
|
|
# Attempt to open a new mode and, if possible, call :begin to prepare the block
|
|
def try_begin(line, blocks, buffer, mode, lookahead: nil)
|
|
return blocks, buffer, mode if mode
|
|
|
|
mode = select_mode(line, lookahead: lookahead,
|
|
blocks: blocks,
|
|
buffer: buffer)
|
|
blocks.append(mode.begin(line)) if mode.respond_to?(:begin)
|
|
[blocks, buffer, mode]
|
|
end
|
|
|
|
# Assign self as parent to all variants
|
|
# @return [void]
|
|
def prepare
|
|
@variants.each do |variant|
|
|
unless variant[0].is_a? ::RBMark::Parsing::BlockVariant
|
|
raise StandardError, "#{variant} is not a BlockVariant"
|
|
end
|
|
|
|
variant[0].parent = self
|
|
end
|
|
@variants.sort_by!(&:last)
|
|
end
|
|
|
|
# Flush the buffer using given mode
|
|
# @param blocks [Array<RBMark::DOM::DOMObject>]
|
|
# @param buffer [String]
|
|
# @param mode [Object]
|
|
# @return [Array(Array<RBMark::DOM::DOMObject>, String, ::RBMark::Parsing::Variant)]
|
|
def flush(blocks, buffer, mode)
|
|
return blocks, "" if buffer == ""
|
|
|
|
mode.end(blocks.last, buffer) if mode.respond_to?(:end)
|
|
blocks.append(mode.flush(buffer)) if mode.respond_to?(:flush)
|
|
if mode.respond_to?(:restructure)
|
|
blocks, buffer, mode = mode.restructure(blocks, buffer, mode)
|
|
else
|
|
buffer = ""
|
|
mode = nil
|
|
end
|
|
[blocks, buffer, mode]
|
|
end
|
|
end
|
|
|
|
# Abstract variant interface
|
|
class Variant
|
|
end
|
|
|
|
# Abstract block-level variant
|
|
class BlockVariant < Variant
|
|
# Check if a block begins on this line
|
|
# @param line [String]
|
|
# @param opts [Hash] options hash
|
|
# @option [String, nil] :lookahead next line over
|
|
# @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks
|
|
# @option [String] :buffer current state of buffer
|
|
# @return [Boolean]
|
|
def begin?(_line, **_opts)
|
|
raise StandardError, "Abstract method called"
|
|
end
|
|
|
|
# Check if a block ends on this line
|
|
# @param line [String]
|
|
# @param opts [Hash] options hash
|
|
# @option [String, nil] :lookahead next line over
|
|
# @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks
|
|
# @option [String] :buffer current state of buffer
|
|
# @return [Boolean]
|
|
def end?(_line, **_opts)
|
|
raise StandardError, "Abstract method called"
|
|
end
|
|
|
|
# @!method begin(buffer)
|
|
# Open a block to be later filled in by BlockVariant#end
|
|
# @param buffer [String]
|
|
# @return [::RBMark::DOM::DOMObject]
|
|
|
|
# @!method end(block, buffer)
|
|
# Finalize a block opened by begin
|
|
# @param buffer [String]
|
|
# @return [void]
|
|
|
|
# @!method flush(buffer)
|
|
# Flush buffer and create a new DOM object
|
|
# @param buffer [String]
|
|
# @return [::RBMark::DOM::DOMObject]
|
|
|
|
# @!method restructure(blocks, buffer, mode)
|
|
# Restructure current set of blocks (if method is defined)
|
|
# @param blocks [Array<::RBMark::DOM::DOMObject>]
|
|
# @param buffer [String]
|
|
# @param mode [::RBMark::Parsing::Variant]
|
|
# @return [Array(Array<RBMark::DOM::DOMObject>, String, ::RBMark::Parsing::Variant)]
|
|
|
|
attr_accessor :parent
|
|
end
|
|
|
|
# Paragraph breaking variant
|
|
class BreakerVariant < BlockVariant
|
|
# Check that a paragraph matches the breaker
|
|
# @param buffer [String]
|
|
# @return [Class, nil]
|
|
def match(_buffer)
|
|
raise StandardError, "Abstract method called"
|
|
end
|
|
|
|
# @!method preprocess(buffer)
|
|
# preprocess buffer
|
|
# @param buffer [String]
|
|
# @return [String]
|
|
end
|
|
|
|
# Paragraph variant
|
|
class ParagraphVariant < BlockVariant
|
|
# (see BlockVariant#begin?)
|
|
def begin?(line, **_opts)
|
|
line.match?(/\S/)
|
|
end
|
|
|
|
# (see BlockVariant#end?)
|
|
def end?(line, lookahead: nil, **_opts)
|
|
return true if check_paragraph_breakers(line)
|
|
|
|
if lookahead
|
|
return false if check_paragraph_breakers(lookahead)
|
|
|
|
return false if lookahead.match(/^ {4}/)
|
|
|
|
!parent.select_mode(lookahead).is_a?(self.class)
|
|
else
|
|
true
|
|
end
|
|
end
|
|
|
|
# (see BlockVariant#flush)
|
|
# @sg-ignore
|
|
def flush(buffer)
|
|
dom_class = nil
|
|
breaker = parent.variants.find do |x|
|
|
x[0].is_a?(::RBMark::Parsing::BreakerVariant) &&
|
|
(dom_class = x[0].match(buffer))
|
|
end&.first
|
|
buffer = breaker.preprocess(buffer) if breaker.respond_to?(:preprocess)
|
|
(dom_class or ::RBMark::DOM::Paragraph).parse(buffer.strip)
|
|
end
|
|
|
|
private
|
|
|
|
def check_paragraph_breakers(line)
|
|
breakers = parent.variants.filter_map do |x|
|
|
x[0] if x[0].is_a? ::RBMark::Parsing::BreakerVariant
|
|
end
|
|
breakers.any? { |x| x.begin?(line, breaks_paragraph: true) }
|
|
end
|
|
end
|
|
|
|
# Thematic break variant
|
|
class ThematicBreakVariant < BlockVariant
|
|
# (see BlockVariant#begin?)
|
|
def begin?(line, **_opts)
|
|
line.match?(/^(?:[- ]{3,}|[_ ]{3,}|[* ]{3,})$/) &&
|
|
line.match?(/^ {0,3}[-_*]/) &&
|
|
(
|
|
line.count("-") >= 3 ||
|
|
line.count("_") >= 3 ||
|
|
line.count("*") >= 3
|
|
)
|
|
end
|
|
|
|
# (see BlockVariant#end?)
|
|
def end?(_line, **_opts)
|
|
true
|
|
end
|
|
|
|
# (see BlockVariant#flush)
|
|
def flush(_buffer)
|
|
::RBMark::DOM::HorizontalRule.new
|
|
end
|
|
end
|
|
|
|
# ATX Heading variant
|
|
class ATXHeadingVariant < BlockVariant
|
|
# (see BlockVariant#begin?)
|
|
def begin?(line, **_opts)
|
|
line.match?(/^ {0,3}\#{1,6}(?: .*|)$/)
|
|
end
|
|
|
|
# (see BlockVariant#end?)
|
|
def end?(_line, **_opts)
|
|
true
|
|
end
|
|
|
|
# (see BlockVariant#flush)
|
|
def flush(buffer)
|
|
lvl, content = buffer.match(/^ {0,3}(\#{1,6})( .*|)$/)[1..2]
|
|
content = content.gsub(/( #+|)$/, "")
|
|
heading(lvl).parse(content.strip)
|
|
end
|
|
|
|
private
|
|
|
|
def heading(lvl)
|
|
case lvl.length
|
|
when 1 then ::RBMark::DOM::Heading1
|
|
when 2 then ::RBMark::DOM::Heading2
|
|
when 3 then ::RBMark::DOM::Heading3
|
|
when 4 then ::RBMark::DOM::Heading4
|
|
when 5 then ::RBMark::DOM::Heading5
|
|
when 6 then ::RBMark::DOM::Heading6
|
|
end
|
|
end
|
|
end
|
|
|
|
# Paragraph closing variant
|
|
class BlankSeparator < BreakerVariant
|
|
# (see BlockVariant#begin?)
|
|
def begin?(line, breaks_paragraph: nil, **_opts)
|
|
breaks_paragraph &&
|
|
line.match?(/^ {0,3}$/)
|
|
end
|
|
|
|
# (see BlockVariant#end?)
|
|
def end?(_line, **_opts)
|
|
true
|
|
end
|
|
|
|
# (see BreakerVariant#match)
|
|
def match(_buffer)
|
|
nil
|
|
end
|
|
end
|
|
|
|
# Setext heading variant
|
|
class SetextHeadingVariant < BreakerVariant
|
|
# (see BlockVariant#begin?)
|
|
def begin?(line, breaks_paragraph: nil, **_opts)
|
|
breaks_paragraph &&
|
|
line.match?(/^ {0,3}(?:-+|=+) *$/)
|
|
end
|
|
|
|
# (see BlockVariant#end?)
|
|
def end?(_line, **_opts)
|
|
true
|
|
end
|
|
|
|
# (see BreakerVariant#match)
|
|
def match(buffer)
|
|
return nil unless preprocess(buffer).match(/\S/)
|
|
|
|
heading(buffer.lines.last)
|
|
end
|
|
|
|
# (see BreakerVariant#preprocess)
|
|
def preprocess(buffer)
|
|
buffer.lines[..-2].join
|
|
end
|
|
|
|
private
|
|
|
|
def heading(buffer)
|
|
case buffer
|
|
when /^ {0,3}-+ *$/ then ::RBMark::DOM::Heading2
|
|
when /^ {0,3}=+ *$/ then ::RBMark::DOM::Heading1
|
|
end
|
|
end
|
|
end
|
|
|
|
# Indented literal block variant
|
|
class IndentedBlockVariant < BlockVariant
|
|
# (see BlockVariant#begin?)
|
|
def begin?(line, **_opts)
|
|
line.match?(/^(?: {4}|\t)/)
|
|
end
|
|
|
|
# (see BlockVariant#end?)
|
|
def end?(_line, lookahead: nil, **_opts)
|
|
!lookahead&.match?(/^(?: {4}.*|\s*|\t)$/)
|
|
end
|
|
|
|
# (see BlockVariant#flush)
|
|
def flush(buffer)
|
|
text = buffer.lines.map { |x| "#{x.chomp[4..]}\n" }.join
|
|
block = ::RBMark::DOM::IndentBlock.new
|
|
block.content = text # TODO: replace this with inline text
|
|
block
|
|
end
|
|
end
|
|
|
|
# Fenced code block
|
|
class FencedCodeBlock < BlockVariant
|
|
# (see BlockVariant#begin?)
|
|
def begin?(line, **_opts)
|
|
line.match?(/^(?:`{3,}[^`]*|~{3,}.*)$/)
|
|
end
|
|
|
|
# (see BlockVariant#end?)
|
|
def end?(line, blocks: nil, buffer: nil, **_opts)
|
|
buffer.lines.length > 1 and
|
|
line.strip == blocks.last.properties[:expected_closer]
|
|
end
|
|
|
|
# (see BlockVariant#begin)
|
|
def begin(buffer)
|
|
block = ::RBMark::DOM::CodeBlock.new
|
|
block.properties[:expected_closer] = buffer.match(/^(?:`{3,}|~{3,})/)[0]
|
|
block.properties[:infostring] = buffer.match(/^(?:`{3,}|~{3,})(.*)$/)[1]
|
|
.strip
|
|
block
|
|
end
|
|
|
|
# (see BlockVariant#end)
|
|
def end(block, buffer)
|
|
# TODO: replace this with inline text
|
|
block.properties.delete(:expected_closer)
|
|
block.content = buffer.lines[1..-2].join
|
|
end
|
|
end
|
|
end
|
|
|
|
# Module for representing abstract object hierarchy
|
|
module DOM
|
|
# Abstract container
|
|
class DOMObject
|
|
class << self
|
|
# Hook for initializing variables
|
|
# @param subclass [Class]
|
|
def inherited(subclass)
|
|
super
|
|
@subclasses ||= []
|
|
@subclasses.append(subclass)
|
|
subclass.variants = @variants.dup
|
|
subclass.variants ||= []
|
|
subclass.atomic_mode = @atomic_mode
|
|
subclass.scanner_class = @scanner_class
|
|
end
|
|
|
|
# Add potential sub-element variant
|
|
# @param cls [Class] DOMObject subclass
|
|
def variant(cls, prio: 1)
|
|
unless cls < ::RBMark::Parsing::Variant
|
|
raise StandardError, "#{cls} is not a DOMObject subclass"
|
|
end
|
|
|
|
@variants.append([cls, prio])
|
|
@subclasses&.each do |subclass|
|
|
subclass.variant(cls, prio)
|
|
end
|
|
end
|
|
|
|
# Set scanner class
|
|
# @param cls [Class] DOMObject subclass
|
|
def scanner(cls)
|
|
unless cls < ::RBMark::Parsing::Scanner
|
|
raise StandardError, "#{cls} is not a Scanner subclass"
|
|
end
|
|
|
|
@scanner_class = cls
|
|
@subclasses&.each do |subclass|
|
|
subclass.scanner(cls)
|
|
end
|
|
end
|
|
|
|
# Prepare scanner and variants
|
|
# @return [void]
|
|
def prepare
|
|
return if @prepared
|
|
|
|
@scanner = @scanner_class.new
|
|
@scanner.variants = @variants.map { |x| [x[0].new, x[1]] }
|
|
end
|
|
|
|
# Parse text from the given context
|
|
# @param text [String]
|
|
# @return [self]
|
|
def parse(text)
|
|
prepare unless @atomic_mode
|
|
instance = new
|
|
if @atomic_mode
|
|
instance.content = text
|
|
else
|
|
instance.append(*@scanner.scan(text))
|
|
end
|
|
instance
|
|
end
|
|
|
|
# Create a new instance of class or referenced class
|
|
# @return [self, Class]
|
|
def create
|
|
if @alt_for
|
|
@alt_for.new
|
|
else
|
|
new
|
|
end
|
|
end
|
|
|
|
# Set the atomic flag
|
|
# @return [void]
|
|
def atomic
|
|
@atomic_mode = true
|
|
end
|
|
|
|
attr_accessor :variants, :scanner_class, :alt_for, :atomic_mode
|
|
end
|
|
|
|
def initialize
|
|
@content = nil
|
|
@children = []
|
|
@properties = {}
|
|
end
|
|
|
|
# Set certain property in the properties hash
|
|
# @param properties [Hash] proeprties to update
|
|
def property(**properties)
|
|
@properties.update(**properties)
|
|
end
|
|
|
|
# Add child to container
|
|
# @param child [DOMObject]
|
|
def append(*children)
|
|
unless children.all? { |x| x.is_a? DOMObject }
|
|
raise StandardError, "one of #{children.inspect} is not a DOMObject"
|
|
end
|
|
|
|
@children.append(*children)
|
|
end
|
|
|
|
# Insert a child into the container
|
|
# @param child [DOMObject]
|
|
# @param index [Integer]
|
|
def insert(index, child)
|
|
raise StandardError, "not a DOMObject" unless child.is_a? DOMObject
|
|
|
|
@children.insert(index, child)
|
|
end
|
|
|
|
# Delete a child from container
|
|
# @param index [Integer]
|
|
def delete_at(index)
|
|
@children.delete_at(index)
|
|
end
|
|
|
|
# Get a child from the container
|
|
# @param key [Integer]
|
|
def [](key)
|
|
@children[key]
|
|
end
|
|
|
|
# Set text content of a DOMObject
|
|
# @param text [String]
|
|
def content=(text)
|
|
raise StandardError, "not a String" unless text.is_a? String
|
|
|
|
@content = text
|
|
end
|
|
|
|
# Get text content of a DOMObject
|
|
# @return [String, nil]
|
|
attr_reader :content, :children, :properties
|
|
end
|
|
|
|
# Inline text
|
|
class Text < DOMObject
|
|
end
|
|
|
|
# Inline preformatted text
|
|
class InlinePre < DOMObject
|
|
end
|
|
|
|
# Infline formattable text
|
|
class InlineFormattable < DOMObject
|
|
atomic
|
|
end
|
|
|
|
# Bold text
|
|
class InlineBold < InlineFormattable
|
|
end
|
|
|
|
# Italics text
|
|
class InlineItalics < InlineFormattable
|
|
end
|
|
|
|
# Inline italics text (alternative)
|
|
class InlineAltItalics < InlineFormattable
|
|
end
|
|
|
|
# Underline text
|
|
class InlineUnder < InlineFormattable
|
|
end
|
|
|
|
# Strikethrough text
|
|
class InlineStrike < InlineFormattable
|
|
end
|
|
|
|
# Hyperreferenced text
|
|
class InlineLink < InlineFormattable
|
|
end
|
|
|
|
# Image
|
|
class InlineImage < InlinePre
|
|
end
|
|
|
|
# Linebreak
|
|
class InlineBreak < DOMObject
|
|
end
|
|
|
|
# Document root
|
|
class Document < DOMObject
|
|
scanner ::RBMark::Parsing::LineScanner
|
|
variant ::RBMark::Parsing::ATXHeadingVariant
|
|
variant ::RBMark::Parsing::ThematicBreakVariant
|
|
variant ::RBMark::Parsing::SetextHeadingVariant
|
|
variant ::RBMark::Parsing::IndentedBlockVariant
|
|
variant ::RBMark::Parsing::FencedCodeBlock
|
|
variant ::RBMark::Parsing::BlankSeparator, prio: 9998
|
|
variant ::RBMark::Parsing::ParagraphVariant, prio: 9999
|
|
end
|
|
|
|
# Paragraph in a document (separated by 2 newlines)
|
|
class Paragraph < InlineFormattable
|
|
atomic
|
|
end
|
|
|
|
# Heading level 1
|
|
class Heading1 < InlineFormattable
|
|
end
|
|
|
|
# Heading level 2
|
|
class Heading2 < Heading1
|
|
end
|
|
|
|
# Heading level 3
|
|
class Heading3 < Heading1
|
|
end
|
|
|
|
# Heading level 4
|
|
class Heading4 < Heading1
|
|
end
|
|
|
|
# Heading level 5
|
|
class Heading5 < Heading1
|
|
end
|
|
|
|
# Heading level 6
|
|
class Heading6 < Heading1
|
|
end
|
|
|
|
# Preformatted code block
|
|
class CodeBlock < DOMObject
|
|
end
|
|
|
|
# Quote block
|
|
class QuoteBlock < Document
|
|
end
|
|
|
|
# Table
|
|
class TableBlock < DOMObject
|
|
end
|
|
|
|
# List element
|
|
class ListElement < Document
|
|
end
|
|
|
|
# Unordered list
|
|
class ULBlock < DOMObject
|
|
end
|
|
|
|
# Ordered list block
|
|
class OLBlock < DOMObject
|
|
end
|
|
|
|
# Indent block
|
|
class IndentBlock < DOMObject
|
|
end
|
|
|
|
# Horizontal rule
|
|
class HorizontalRule < DOMObject
|
|
atomic
|
|
end
|
|
end
|
|
end
|