home73k/lib/home73k/blog/post.ex

168 lines
5.2 KiB
Elixir

defmodule Home73k.Blog.Post do
@moduledoc """
Blog Post struct definition and content parsing functions.
"""
alias Home73k.Highlighter
@enforce_keys [:title, :id, :date, :author, :tags, :lede, :body, :corpus]
defstruct [:title, :id, :date, :author, :tags, :lede, :body, :corpus]
@strip_words ~w(the and are for not but had has was all any too one you his her can that with have this will your from they want been much some very them into which then now get its youll youre isnt wasnt)
@doc """
The public parse!/1 function begins the post parse process by reading
the file. By passing through a series of other functions, it ultimately
returns either a %Post{} or nil.
"""
def parse!(post_path) do
post_path
|> File.read()
|> split_raw_file_data()
|> parse_frontmatter()
|> parse_lede()
|> parse_body()
|> build_corpus()
|> build_post()
end
# """ split_raw_file_data/1
# If we receive {:ok, file_data}, we split frontmatter from markdown
# content and return [raw_frontmatter, markdown]. Otherwise return nil.
# """
defp split_raw_file_data({:ok, file_data}) do
file_data |> String.split("---", parts: 2, trim: true)
end
defp split_raw_file_data(_), do: nil
# """ parse_frontmatter/1
# If we receive [raw_frontmatter, markdown], we parse the frontmatter.
# Otherwise, return nil.
# """
defp parse_frontmatter([fm, md]) do
case parse_frontmatter_string(fm) do
{%{} = parsed_fm, _} -> {set_post_id(parsed_fm), String.trim(md)}
{:error, _} -> nil
end
end
defp parse_frontmatter(nil), do: nil
# """ parse_lede/1
# Look for lede/excerpt/summary in content and extract it if present.
# We return updated frontmatter, and content with <!--more--> stripped.
defp parse_lede({%{lede: lede} = fm, md}) do
lede = String.trim(lede) |> Earmark.as_html!()
{Map.put(fm, :lede, lede), md}
end
defp parse_lede({fm, md}) do
{lede, body_md} = String.split(md, "<!--more-->", parts: 2) |> extract_lede()
{Map.put(fm, :lede, lede), String.replace(body_md, "<!--more-->", " ")}
end
defp parse_lede(_), do: nil
# """ parse_body/1
# Convert body markdown to html, and highlight code fence blocks
defp parse_body({fm, md}) do
html = Earmark.as_html!(md) |> Highlighter.highlight_all()
Map.put(fm, :body, html)
end
defp parse_body(_), do: nil
# """ build_corpus/1
# Create a searchable word list for the post, for live searching
defp build_corpus(%{title: title, lede: lede, body: body, tags: tags} = post_data) do
# initialize corpus string from: title, lede, body, tags
# grab text only, rejecting HTML
# downcase & scrub line breaks, slashes
corpus =
(tags ++ [title, lede || " ", body])
|> Enum.join(" ")
|> Floki.parse_fragment!()
|> Floki.text()
|> String.downcase()
|> String.replace(["\n", "/", "\\", "(", ")", ":", "=", "_", ".", ",", "[", "]"], " ")
# restrict corpus to letters & numbers,
# then split to words (space delim), trimming as we go
# then reject short & common words
# reduce to unique words and join back to space-delim string
corpus =
Regex.replace(~r/[^a-z0-9 ]/, corpus, "")
|> String.split(" ", trim: true)
|> Stream.reject(&reject_word?/1)
|> Stream.uniq()
|> Enum.join(" ")
# Finally, return post_data with corpus
Map.put(post_data, :corpus, corpus)
end
defp build_corpus(_), do: nil
# """ build_post/1
# Create post struct from post data map
defp build_post(%{} = post_data) do
struct!(__MODULE__, post_data)
end
defp build_post(_), do: nil
######################################################################
# HELPERS
######################################################################
# """ parse_frontmatter_string/1
# We expect raw frontmatter as a string that evaluates to an elixir
# map, so we try Code.eval_string/1 and rescue with nil if that raises
# """
defp parse_frontmatter_string(fm) do
try do
Code.eval_string(fm)
rescue
_ -> {:error, nil}
end
end
# """ extract_lede
# Handle split of post body. If lede found, return as html with body.
# Otherwise return nil with body.
# """
defp extract_lede([lede, body]) do
lede_html = String.trim_trailing(lede) |> Earmark.as_html!() |> Highlighter.highlight_code_blocks()
{lede_html, String.trim_leading(body)}
end
defp extract_lede([body]), do: {nil, body}
# """ set_post_id
# If no id in frontmatter, convert title to id and add to map
# """
defp set_post_id(%{id: _} = fm), do: fm
defp set_post_id(%{title: title} = fm) do
Map.put(fm, :id, parse_title_to_id(title))
end
# """ parse_title_to_id
# Takes a post title and returns a id cleansed for URI request path
# """
def parse_title_to_id(title) do
title_text = Floki.parse_fragment!(title) |> Floki.text() |> String.downcase()
~r/[^a-z0-9 ]/
|> Regex.replace(title_text, "")
|> String.split(" ", trim: true)
|> Stream.reject(&reject_word?/1)
|> Enum.join("-")
end
# """ reject_word?
# Returns true to reject short or common words
# Used by parse_title_to_id and build_corpus
# """
defp reject_word?(word), do: String.length(word) < 3 || word in @strip_words
end