Module supermark.parse
Expand source code
import re
from enum import Enum
from pathlib import Path
class ParserState(Enum):
MARKDOWN = 0
YAML = 1
CODE = 2
HTML = 3
AFTER_YAML = 4
AFTER_YAML_CONTENT = 5
ENV_PATTERN = re.compile("[a-zA-Z]*:")
def is_empty(s_line):
return not s_line
class RawChunk:
def __init__(self, lines, chunk_type, start_line_number, path, report):
self.lines = lines
self.type = chunk_type
self.start_line_number = start_line_number
self.path = path
self.parent_path = Path(path).parent.parent
self.report = report
# check if we only got empty lines
def all_empty(lines):
if len(lines) == 0:
return True
for line in lines:
if line.strip():
return False
return True
self._is_empty = all_empty(self.lines)
# remove blank lines from the beginning
while len(self.lines) > 0 and is_empty(self.lines[0].strip()):
self.lines.pop(0)
self.start_line_number = self.start_line_number + 1
self.tag = None
if len(self.lines) > 0:
if has_class_tag(self.lines[0]):
self.tag = self.lines[0].strip().split(":")[1].lower()
self.post_yaml = None
def get_tag(self):
return self.tag
def is_empty(self):
return self._is_empty
def get_type(self):
return self.type
def get_first_line(self):
if len(self.lines) == 0:
return "empty"
return self.lines[0]
def yaml_start(s_line):
return s_line == "---"
def yaml_stop(s_line):
return s_line == "---"
def has_class_tag(s_line):
return s_line.startswith(":") and ENV_PATTERN.match(s_line)
def markdown_start(s_line, empty_lines):
return (
has_class_tag(s_line)
or s_line.startswith("# ")
or empty_lines >= 2
or s_line.startswith("Aside:")
)
def html_start(s_line, empty_lines):
return s_line.startswith("<") and empty_lines >= 2
def html_stop(empty_lines):
return empty_lines >= 2
def code_start(s_line):
return s_line.startswith("```")
def code_stop(s_line):
return s_line.startswith("```")
def _parse(lines, path, report):
chunks = []
current_lines = []
empty_lines = 0
state = ParserState.MARKDOWN
start_line_number = 0
previous_yaml_chunk = None
for line_number, line in enumerate(lines, start=1):
s_line = line.strip()
if state == ParserState.MARKDOWN:
if is_empty(s_line):
empty_lines = empty_lines + 1
current_lines.append(line)
elif yaml_start(s_line):
chunks.append(
RawChunk(
current_lines,
ParserState.MARKDOWN,
start_line_number,
path,
report,
)
)
state = ParserState.YAML
current_lines = []
start_line_number = line_number
empty_lines = 0
elif code_start(s_line):
chunks.append(
RawChunk(
current_lines,
ParserState.MARKDOWN,
start_line_number,
path,
report,
)
)
state = ParserState.CODE
current_lines = [line]
start_line_number = line_number
empty_lines = 0
elif html_start(s_line, empty_lines):
chunks.append(
RawChunk(
current_lines,
ParserState.MARKDOWN,
start_line_number,
path,
report,
)
)
state = ParserState.HTML
current_lines = []
current_lines.append(line)
start_line_number = line_number
empty_lines = 0
elif markdown_start(s_line, empty_lines):
chunks.append(
RawChunk(
current_lines,
ParserState.MARKDOWN,
start_line_number,
path,
report,
)
)
state = ParserState.MARKDOWN
current_lines = []
current_lines.append(line)
start_line_number = line_number
empty_lines = 0
else:
current_lines.append(line)
empty_lines = 0
elif state == ParserState.YAML:
if yaml_stop(s_line):
previous_yaml_chunk = RawChunk(
current_lines, ParserState.YAML, start_line_number, path, report
)
chunks.append(previous_yaml_chunk)
state = ParserState.AFTER_YAML
current_lines = []
start_line_number = line_number + 1
else:
current_lines.append(line)
elif state == ParserState.AFTER_YAML:
if is_empty(s_line):
empty_lines = empty_lines + 1
current_lines.append(line)
state = ParserState.MARKDOWN
previous_yaml_chunk = None
else:
current_lines.append(line)
state = ParserState.AFTER_YAML_CONTENT
empty_lines = 0
elif state == ParserState.AFTER_YAML_CONTENT:
if is_empty(s_line):
empty_lines = empty_lines + 1
if empty_lines > 1:
previous_yaml_chunk.post_yaml = current_lines
state = ParserState.MARKDOWN
current_lines = []
else:
current_lines.append(line)
start_line_number = line_number + 1
else:
empty_lines = 0
current_lines.append(line)
elif state == ParserState.CODE:
if code_stop(s_line):
current_lines.append(line)
chunks.append(
RawChunk(
current_lines, ParserState.CODE, start_line_number, path, report
)
)
state = ParserState.MARKDOWN
current_lines = []
start_line_number = line_number + 1
else:
current_lines.append(line)
elif state == ParserState.HTML:
if is_empty(s_line):
empty_lines = empty_lines + 1
current_lines.append(line)
elif html_stop(empty_lines):
chunks.append(
RawChunk(
current_lines, ParserState.HTML, start_line_number, path, report
)
)
state = ParserState.MARKDOWN
current_lines = []
current_lines.append(line)
start_line_number = line_number
empty_lines = 0
else:
current_lines.append(line)
empty_lines = 0
# create last chunk
chunks.append(RawChunk(current_lines, state, start_line_number, path, report))
# remove chunks that turn out to be empty
chunks = [item for item in chunks if not item.is_empty()]
return chunks
Functions
def code_start(s_line)
-
Expand source code
def code_start(s_line): return s_line.startswith("```")
def code_stop(s_line)
-
Expand source code
def code_stop(s_line): return s_line.startswith("```")
def has_class_tag(s_line)
-
Expand source code
def has_class_tag(s_line): return s_line.startswith(":") and ENV_PATTERN.match(s_line)
def html_start(s_line, empty_lines)
-
Expand source code
def html_start(s_line, empty_lines): return s_line.startswith("<") and empty_lines >= 2
def html_stop(empty_lines)
-
Expand source code
def html_stop(empty_lines): return empty_lines >= 2
def is_empty(s_line)
-
Expand source code
def is_empty(s_line): return not s_line
def markdown_start(s_line, empty_lines)
-
Expand source code
def markdown_start(s_line, empty_lines): return ( has_class_tag(s_line) or s_line.startswith("# ") or empty_lines >= 2 or s_line.startswith("Aside:") )
def yaml_start(s_line)
-
Expand source code
def yaml_start(s_line): return s_line == "---"
def yaml_stop(s_line)
-
Expand source code
def yaml_stop(s_line): return s_line == "---"
Classes
class ParserState (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration.
Expand source code
class ParserState(Enum): MARKDOWN = 0 YAML = 1 CODE = 2 HTML = 3 AFTER_YAML = 4 AFTER_YAML_CONTENT = 5
Ancestors
- enum.Enum
Class variables
var AFTER_YAML
var AFTER_YAML_CONTENT
var CODE
var HTML
var MARKDOWN
var YAML
class RawChunk (lines, chunk_type, start_line_number, path, report)
-
Expand source code
class RawChunk: def __init__(self, lines, chunk_type, start_line_number, path, report): self.lines = lines self.type = chunk_type self.start_line_number = start_line_number self.path = path self.parent_path = Path(path).parent.parent self.report = report # check if we only got empty lines def all_empty(lines): if len(lines) == 0: return True for line in lines: if line.strip(): return False return True self._is_empty = all_empty(self.lines) # remove blank lines from the beginning while len(self.lines) > 0 and is_empty(self.lines[0].strip()): self.lines.pop(0) self.start_line_number = self.start_line_number + 1 self.tag = None if len(self.lines) > 0: if has_class_tag(self.lines[0]): self.tag = self.lines[0].strip().split(":")[1].lower() self.post_yaml = None def get_tag(self): return self.tag def is_empty(self): return self._is_empty def get_type(self): return self.type def get_first_line(self): if len(self.lines) == 0: return "empty" return self.lines[0]
Methods
def get_first_line(self)
-
Expand source code
def get_first_line(self): if len(self.lines) == 0: return "empty" return self.lines[0]
def get_tag(self)
-
Expand source code
def get_tag(self): return self.tag
def get_type(self)
-
Expand source code
def get_type(self): return self.type
def is_empty(self)
-
Expand source code
def is_empty(self): return self._is_empty