updated links formatting in the autogenerated html->text mail body

This commit is contained in:
Gani Georgiev 2023-11-26 14:43:24 +02:00
parent 821aae4a62
commit 531a7abec9
2 changed files with 37 additions and 24 deletions

View File

@ -2,7 +2,7 @@
- Bumped the minimum required Go version to 1.21.0 in order to integrate with the builtin `slog` package. - Bumped the minimum required Go version to 1.21.0 in order to integrate with the builtin `slog` package.
- removed _requests table in favor of _logs - removed _requests_ table in favor of logs
- Renamed: - Renamed:
``` ```
@ -29,6 +29,8 @@
- Soft-deprecated and renamed `app.Cache()` with `app.Store()`. - Soft-deprecated and renamed `app.Cache()` with `app.Store()`.
- Updated links formatting in the autogenerated html->text mail body.
## v0.20.0-rc3 ## v0.20.0-rc3

View File

@ -10,6 +10,16 @@ import (
var whitespaceRegex = regexp.MustCompile(`\s+`) var whitespaceRegex = regexp.MustCompile(`\s+`)
var tagsToSkip = []string{
"style", "script", "iframe", "applet", "object", "svg", "img",
"button", "form", "textarea", "input", "select", "option", "template",
}
var inlineTags = []string{
"a", "span", "small", "strike", "strong",
"sub", "sup", "em", "b", "u", "i",
}
// Very rudimentary auto HTML to Text mail body converter. // Very rudimentary auto HTML to Text mail body converter.
// //
// Caveats: // Caveats:
@ -20,32 +30,24 @@ var whitespaceRegex = regexp.MustCompile(`\s+`)
// - Trailing spaces are preserved. // - Trailing spaces are preserved.
// - Multiple consequence newlines are collapsed as one unless multiple <br> tags are used. // - Multiple consequence newlines are collapsed as one unless multiple <br> tags are used.
func html2Text(htmlDocument string) (string, error) { func html2Text(htmlDocument string) (string, error) {
var builder strings.Builder
doc, err := html.Parse(strings.NewReader(htmlDocument)) doc, err := html.Parse(strings.NewReader(htmlDocument))
if err != nil { if err != nil {
return "", err return "", err
} }
tagsToSkip := []string{ var builder strings.Builder
"style", "script", "iframe", "applet", "object", "svg", "img",
"button", "form", "textarea", "input", "select", "option", "template",
}
inlineTags := []string{
"a", "span", "small", "strike", "strong",
"sub", "sup", "em", "b", "u", "i",
}
var canAddNewLine bool var canAddNewLine bool
// see https://pkg.go.dev/golang.org/x/net/html#Parse // see https://pkg.go.dev/golang.org/x/net/html#Parse
var f func(*html.Node) var f func(*html.Node, *strings.Builder)
f = func(n *html.Node) { f = func(n *html.Node, activeBuilder *strings.Builder) {
// start link wrapping for producing "[text](link)" formatted string
isLink := n.Type == html.ElementNode && n.Data == "a" isLink := n.Type == html.ElementNode && n.Data == "a"
if isLink { if isLink {
builder.WriteString("[") var linkBuilder strings.Builder
activeBuilder = &linkBuilder
} else if activeBuilder == nil {
activeBuilder = &builder
} }
switch n.Type { switch n.Type {
@ -58,34 +60,42 @@ func html2Text(htmlDocument string) (string, error) {
} }
if txt != "" { if txt != "" {
builder.WriteString(txt) activeBuilder.WriteString(txt)
canAddNewLine = true canAddNewLine = true
} }
case html.ElementNode: case html.ElementNode:
if n.Data == "br" { if n.Data == "br" {
// always write new lines when <br> tag is used // always write new lines when <br> tag is used
builder.WriteString("\r\n") activeBuilder.WriteString("\r\n")
canAddNewLine = false canAddNewLine = false
} else if canAddNewLine && !list.ExistInSlice(n.Data, inlineTags) { } else if canAddNewLine && !list.ExistInSlice(n.Data, inlineTags) {
builder.WriteString("\r\n") activeBuilder.WriteString("\r\n")
canAddNewLine = false canAddNewLine = false
} }
// prefix list items with dash // prefix list items with dash
if n.Data == "li" { if n.Data == "li" {
builder.WriteString("- ") activeBuilder.WriteString("- ")
} }
} }
for c := n.FirstChild; c != nil; c = c.NextSibling { for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type != html.ElementNode || !list.ExistInSlice(c.Data, tagsToSkip) { if c.Type != html.ElementNode || !list.ExistInSlice(c.Data, tagsToSkip) {
f(c) f(c, activeBuilder)
} }
} }
// end link wrapping // format links as [label](href)
if isLink { if isLink {
linkTxt := strings.TrimSpace(activeBuilder.String())
if linkTxt == "" {
linkTxt = "LINK"
}
builder.WriteString("[")
builder.WriteString(linkTxt)
builder.WriteString("]") builder.WriteString("]")
// link href attr extraction
for _, a := range n.Attr { for _, a := range n.Attr {
if a.Key == "href" { if a.Key == "href" {
if a.Val != "" { if a.Val != "" {
@ -96,10 +106,11 @@ func html2Text(htmlDocument string) (string, error) {
break break
} }
} }
activeBuilder.Reset()
} }
} }
f(doc) f(doc, &builder)
return strings.TrimSpace(builder.String()), nil return strings.TrimSpace(builder.String()), nil
} }