rubymark/lib/rbmark.rb

635 lines
16 KiB
Ruby

# frozen_string_literal: true
module RBMark
# Module for representing parsing-related constructs
module Parsing
# Abstract scanner interface implementation
class Scanner
def initialize
@variants = []
end
# Scan text
# @param text [String]
# @return [Array<RBMark::DOM::DOMObject>]
def scan(_text)
raise StandardError, "Abstract method called"
# ...
end
attr_accessor :variants
end
# Line-level scanner for blocks
class LineScanner < Scanner
# (see ::RBMark::Parsing::Scanner#scan)
def scan(text, buffer: "", blocks: [], mode: nil)
prepare
lines = text.lines
lines.each_with_index do |line, index|
buffer += line
ahead = lines.fetch(index + 1, nil)
blocks, buffer, mode = try_begin(line,
blocks,
buffer,
mode,
lookahead: ahead)
if mode&.end?(line, lookahead: ahead, blocks: blocks, buffer: buffer)
blocks, buffer, mode = flush(blocks, buffer, mode)
end
end
flush(blocks, buffer, mode)[0]
end
# Predict mode for given line
# @param line [String]
# @return [Object]
def select_mode(line, **message)
@variants.find do |variant|
variant[0].begin?(line, **message)
end&.at(0)
end
private
# Attempt to open a new mode and, if possible, call :begin to prepare the block
def try_begin(line, blocks, buffer, mode, lookahead: nil)
return blocks, buffer, mode if mode
mode = select_mode(line, lookahead: lookahead,
blocks: blocks,
buffer: buffer)
blocks.append(mode.begin(line)) if mode.respond_to?(:begin)
[blocks, buffer, mode]
end
# Assign self as parent to all variants
# @return [void]
def prepare
@variants.each do |variant|
unless variant[0].is_a? ::RBMark::Parsing::BlockVariant
raise StandardError, "#{variant} is not a BlockVariant"
end
variant[0].parent = self
end
@variants.sort_by!(&:last)
end
# Flush the buffer using given mode
# @param blocks [Array<RBMark::DOM::DOMObject>]
# @param buffer [String]
# @param mode [Object]
# @return [Array(Array<RBMark::DOM::DOMObject>, String, ::RBMark::Parsing::Variant)]
def flush(blocks, buffer, mode)
return blocks, "" if buffer == ""
mode.end(blocks.last, buffer) if mode.respond_to?(:end)
blocks.append(mode.flush(buffer)) if mode.respond_to?(:flush)
if mode.respond_to?(:restructure)
blocks, buffer, mode = mode.restructure(blocks, buffer, mode)
else
buffer = ""
mode = nil
end
[blocks, buffer, mode]
end
end
# Abstract variant interface
class Variant
end
# Abstract block-level variant
class BlockVariant < Variant
# Check if a block begins on this line
# @param line [String]
# @param opts [Hash] options hash
# @option [String, nil] :lookahead next line over
# @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks
# @option [String] :buffer current state of buffer
# @return [Boolean]
def begin?(_line, **_opts)
raise StandardError, "Abstract method called"
end
# Check if a block ends on this line
# @param line [String]
# @param opts [Hash] options hash
# @option [String, nil] :lookahead next line over
# @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks
# @option [String] :buffer current state of buffer
# @return [Boolean]
def end?(_line, **_opts)
raise StandardError, "Abstract method called"
end
# @!method begin(buffer)
# Open a block to be later filled in by BlockVariant#end
# @param buffer [String]
# @return [::RBMark::DOM::DOMObject]
# @!method end(block, buffer)
# Finalize a block opened by begin
# @param buffer [String]
# @return [void]
# @!method flush(buffer)
# Flush buffer and create a new DOM object
# @param buffer [String]
# @return [::RBMark::DOM::DOMObject]
# @!method restructure(blocks, buffer, mode)
# Restructure current set of blocks (if method is defined)
# @param blocks [Array<::RBMark::DOM::DOMObject>]
# @param buffer [String]
# @param mode [::RBMark::Parsing::Variant]
# @return [Array(Array<RBMark::DOM::DOMObject>, String, ::RBMark::Parsing::Variant)]
attr_accessor :parent
end
# Paragraph breaking variant
class BreakerVariant < BlockVariant
# Check that a paragraph matches the breaker
# @param buffer [String]
# @return [Class, nil]
def match(_buffer)
raise StandardError, "Abstract method called"
end
# @!method preprocess(buffer)
# preprocess buffer
# @param buffer [String]
# @return [String]
end
# Paragraph variant
class ParagraphVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/\S/)
end
# (see BlockVariant#end?)
def end?(line, lookahead: nil, **_opts)
return true if check_paragraph_breakers(line)
if lookahead
return false if check_paragraph_breakers(lookahead)
return false if lookahead.match(/^ {4}/)
!parent.select_mode(lookahead).is_a?(self.class)
else
true
end
end
# (see BlockVariant#flush)
# @sg-ignore
def flush(buffer)
dom_class = nil
breaker = parent.variants.find do |x|
x[0].is_a?(::RBMark::Parsing::BreakerVariant) &&
(dom_class = x[0].match(buffer))
end&.first
buffer = breaker.preprocess(buffer) if breaker.respond_to?(:preprocess)
(dom_class or ::RBMark::DOM::Paragraph).parse(buffer.strip)
end
private
def check_paragraph_breakers(line)
breakers = parent.variants.filter_map do |x|
x[0] if x[0].is_a? ::RBMark::Parsing::BreakerVariant
end
breakers.any? { |x| x.begin?(line, breaks_paragraph: true) }
end
end
# Thematic break variant
class ThematicBreakVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^(?:[- ]{3,}|[_ ]{3,}|[* ]{3,})$/) &&
line.match?(/^ {0,3}[-_*]/) &&
(
line.count("-") >= 3 ||
line.count("_") >= 3 ||
line.count("*") >= 3
)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BlockVariant#flush)
def flush(_buffer)
::RBMark::DOM::HorizontalRule.new
end
end
# ATX Heading variant
class ATXHeadingVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^ {0,3}\#{1,6}(?: .*|)$/)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BlockVariant#flush)
def flush(buffer)
lvl, content = buffer.match(/^ {0,3}(\#{1,6})( .*|)$/)[1..2]
content = content.gsub(/( #+|)$/, "")
heading(lvl).parse(content.strip)
end
private
def heading(lvl)
case lvl.length
when 1 then ::RBMark::DOM::Heading1
when 2 then ::RBMark::DOM::Heading2
when 3 then ::RBMark::DOM::Heading3
when 4 then ::RBMark::DOM::Heading4
when 5 then ::RBMark::DOM::Heading5
when 6 then ::RBMark::DOM::Heading6
end
end
end
# Paragraph closing variant
class BlankSeparator < BreakerVariant
# (see BlockVariant#begin?)
def begin?(line, breaks_paragraph: nil, **_opts)
breaks_paragraph &&
line.match?(/^ {0,3}$/)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BreakerVariant#match)
def match(_buffer)
nil
end
end
# Setext heading variant
class SetextHeadingVariant < BreakerVariant
# (see BlockVariant#begin?)
def begin?(line, breaks_paragraph: nil, **_opts)
breaks_paragraph &&
line.match?(/^ {0,3}(?:-+|=+) *$/)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BreakerVariant#match)
def match(buffer)
return nil unless preprocess(buffer).match(/\S/)
heading(buffer.lines.last)
end
# (see BreakerVariant#preprocess)
def preprocess(buffer)
buffer.lines[..-2].join
end
private
def heading(buffer)
case buffer
when /^ {0,3}-+ *$/ then ::RBMark::DOM::Heading2
when /^ {0,3}=+ *$/ then ::RBMark::DOM::Heading1
end
end
end
# Indented literal block variant
class IndentedBlockVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^(?: {4}|\t)/)
end
# (see BlockVariant#end?)
def end?(_line, lookahead: nil, **_opts)
!lookahead&.match?(/^(?: {4}.*|\s*|\t)$/)
end
# (see BlockVariant#flush)
def flush(buffer)
text = buffer.lines.map { |x| "#{x.chomp[4..]}\n" }.join
block = ::RBMark::DOM::IndentBlock.new
block.content = text # TODO: replace this with inline text
block
end
end
# Fenced code block
class FencedCodeBlock < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^(?:`{3,}[^`]*|~{3,}.*)$/)
end
# (see BlockVariant#end?)
def end?(line, blocks: nil, buffer: nil, **_opts)
buffer.lines.length > 1 and
line.strip == blocks.last.properties[:expected_closer]
end
# (see BlockVariant#begin)
def begin(buffer)
block = ::RBMark::DOM::CodeBlock.new
block.properties[:expected_closer] = buffer.match(/^(?:`{3,}|~{3,})/)[0]
block.properties[:infostring] = buffer.match(/^(?:`{3,}|~{3,})(.*)$/)[1]
.strip
block
end
# (see BlockVariant#end)
def end(block, buffer)
# TODO: replace this with inline text
block.properties.delete(:expected_closer)
block.content = buffer.lines[1..-2].join
end
end
end
# Module for representing abstract object hierarchy
module DOM
# Abstract container
class DOMObject
class << self
# Hook for initializing variables
# @param subclass [Class]
def inherited(subclass)
super
@subclasses ||= []
@subclasses.append(subclass)
subclass.variants = @variants.dup
subclass.variants ||= []
subclass.atomic_mode = @atomic_mode
subclass.scanner_class = @scanner_class
end
# Add potential sub-element variant
# @param cls [Class] DOMObject subclass
def variant(cls, prio: 1)
unless cls < ::RBMark::Parsing::Variant
raise StandardError, "#{cls} is not a DOMObject subclass"
end
@variants.append([cls, prio])
@subclasses&.each do |subclass|
subclass.variant(cls, prio)
end
end
# Set scanner class
# @param cls [Class] DOMObject subclass
def scanner(cls)
unless cls < ::RBMark::Parsing::Scanner
raise StandardError, "#{cls} is not a Scanner subclass"
end
@scanner_class = cls
@subclasses&.each do |subclass|
subclass.scanner(cls)
end
end
# Prepare scanner and variants
# @return [void]
def prepare
return if @prepared
@scanner = @scanner_class.new
@scanner.variants = @variants.map { |x| [x[0].new, x[1]] }
end
# Parse text from the given context
# @param text [String]
# @return [self]
def parse(text)
prepare unless @atomic_mode
instance = new
if @atomic_mode
instance.content = text
else
instance.append(*@scanner.scan(text))
end
instance
end
# Create a new instance of class or referenced class
# @return [self, Class]
def create
if @alt_for
@alt_for.new
else
new
end
end
# Set the atomic flag
# @return [void]
def atomic
@atomic_mode = true
end
attr_accessor :variants, :scanner_class, :alt_for, :atomic_mode
end
def initialize
@content = nil
@children = []
@properties = {}
end
# Set certain property in the properties hash
# @param properties [Hash] proeprties to update
def property(**properties)
@properties.update(**properties)
end
# Add child to container
# @param child [DOMObject]
def append(*children)
unless children.all? { |x| x.is_a? DOMObject }
raise StandardError, "one of #{children.inspect} is not a DOMObject"
end
@children.append(*children)
end
# Insert a child into the container
# @param child [DOMObject]
# @param index [Integer]
def insert(index, child)
raise StandardError, "not a DOMObject" unless child.is_a? DOMObject
@children.insert(index, child)
end
# Delete a child from container
# @param index [Integer]
def delete_at(index)
@children.delete_at(index)
end
# Get a child from the container
# @param key [Integer]
def [](key)
@children[key]
end
# Set text content of a DOMObject
# @param text [String]
def content=(text)
raise StandardError, "not a String" unless text.is_a? String
@content = text
end
# Get text content of a DOMObject
# @return [String, nil]
attr_reader :content, :children, :properties
end
# Inline text
class Text < DOMObject
end
# Inline preformatted text
class InlinePre < DOMObject
end
# Infline formattable text
class InlineFormattable < DOMObject
atomic
end
# Bold text
class InlineBold < InlineFormattable
end
# Italics text
class InlineItalics < InlineFormattable
end
# Inline italics text (alternative)
class InlineAltItalics < InlineFormattable
end
# Underline text
class InlineUnder < InlineFormattable
end
# Strikethrough text
class InlineStrike < InlineFormattable
end
# Hyperreferenced text
class InlineLink < InlineFormattable
end
# Image
class InlineImage < InlinePre
end
# Linebreak
class InlineBreak < DOMObject
end
# Document root
class Document < DOMObject
scanner ::RBMark::Parsing::LineScanner
variant ::RBMark::Parsing::ATXHeadingVariant
variant ::RBMark::Parsing::ThematicBreakVariant
variant ::RBMark::Parsing::SetextHeadingVariant
variant ::RBMark::Parsing::IndentedBlockVariant
variant ::RBMark::Parsing::FencedCodeBlock
variant ::RBMark::Parsing::BlankSeparator, prio: 9998
variant ::RBMark::Parsing::ParagraphVariant, prio: 9999
end
# Paragraph in a document (separated by 2 newlines)
class Paragraph < InlineFormattable
atomic
end
# Heading level 1
class Heading1 < InlineFormattable
end
# Heading level 2
class Heading2 < Heading1
end
# Heading level 3
class Heading3 < Heading1
end
# Heading level 4
class Heading4 < Heading1
end
# Heading level 5
class Heading5 < Heading1
end
# Heading level 6
class Heading6 < Heading1
end
# Preformatted code block
class CodeBlock < DOMObject
end
# Quote block
class QuoteBlock < Document
end
# Table
class TableBlock < DOMObject
end
# List element
class ListElement < Document
end
# Unordered list
class ULBlock < DOMObject
end
# Ordered list block
class OLBlock < DOMObject
end
# Indent block
class IndentBlock < DOMObject
end
# Horizontal rule
class HorizontalRule < DOMObject
atomic
end
end
end