Files
hiscaler-gox/htmlx/html.go
scheibling b4eb50ab55
Some checks failed
Go / build (push) Failing after 7s
Created
2025-04-08 19:16:39 +02:00

187 lines
4.2 KiB
Go

package htmlx
import (
"regexp"
"sort"
"strings"
"unicode/utf8"
"git.cloudyne.io/go/hiscaler-gox/stringx"
)
var (
// Strip regexp
rxStrip = regexp.MustCompile(`(?s)<sty(.*)/style>|<scr(.*)/script>|<link(.*)/>|<meta(.*)/>|<!--(.*)-->| style=['"]+(.*)['"]+`)
// Spaceless regexp
rxSpaceless = regexp.MustCompile(`/>\s+</`)
// Clean regexp
rxCleanCSS = regexp.MustCompile(`(?s)<sty(.*)/style>|<link(.*)/>| style=['"]+(.*)['"]+`)
rxCleanJavascript = regexp.MustCompile(`(?s)<script(.*)/script>`)
rxCleanComment = regexp.MustCompile(`(?s)<!--(.*)-->`)
rxCleanMeta = regexp.MustCompile(`(?s)<meta(.*)/>`)
)
type CleanMode uint32
const (
CleanModeCSS CleanMode = 1 << (10 - iota) // 包括元素内嵌样式
CleanModeJavascript
CleanModeComment
CleanModeMeta
CleanModeSpace
CleanModeAll = CleanModeCSS | CleanModeJavascript | CleanModeComment | CleanModeMeta | CleanModeSpace
)
// Strip Clean html tags
// https://stackoverflow.com/questions/55036156/how-to-replace-all-html-tag-with-empty-string-in-golang
func Strip(html string) string {
html = strings.TrimSpace(html)
if html != "" {
html = rxStrip.ReplaceAllString(html, "")
}
if html == "" {
return ""
}
const (
htmlTagStart = 60 // Unicode `<`
htmlTagEnd = 62 // Unicode `>`
)
// Setup a string builder and allocate enough memory for the new string.
var builder strings.Builder
builder.Grow(len(html) + utf8.UTFMax)
in := false // True if we are inside an HTML tag.
start := 0 // The index of the previous start tag character `<`
end := 0 // The index of the previous end tag character `>`
for i, c := range html {
// If this is the last character and we are not in an HTML tag, save it.
if (i+1) == len(html) && end >= start && c != htmlTagStart && c != htmlTagEnd {
builder.WriteString(html[end:])
}
// Keep going if the character is not `<` or `>`
if c != htmlTagStart && c != htmlTagEnd {
continue
}
if c == htmlTagStart {
// Only update the start if we are not in a tag.
// This make sure we strip out `<<br>` not just `<br>`
if !in {
start = i
}
in = true
// Write the valid string between the close and start of the two tags.
builder.WriteString(html[end:start])
continue
}
// else c == htmlTagEnd
in = false
end = i + 1
}
s := builder.String()
if s != "" {
s = strings.TrimSpace(Spaceless(s))
}
return s
}
// Spaceless 移除多余的空格
func Spaceless(html string) string {
html = stringx.RemoveExtraSpace(html)
if html == "" {
return ""
}
return rxSpaceless.ReplaceAllString(html, "><")
}
func Clean(html string, cleanMode CleanMode) string {
if html == "" {
return html
}
const n = 5
modes := [n]bool{} // css, javascript, comment, meta, space, all
for i := 0; i < n; i++ {
if cleanMode&(1<<uint(10-i)) != 0 {
modes[i] = true
}
}
if modes[n-1] {
html = Spaceless(rxStrip.ReplaceAllString(html, ""))
} else {
for i := 0; i < n-2; i++ {
if modes[i] {
switch i {
case 0:
html = rxCleanCSS.ReplaceAllString(html, "")
case 1:
html = rxCleanJavascript.ReplaceAllString(html, "")
case 2:
html = rxCleanComment.ReplaceAllString(html, "")
case 3:
html = rxCleanMeta.ReplaceAllString(html, "")
case 4:
html = Spaceless(html)
}
}
}
}
return html
}
func Tag(tag, content string, attributes, styles map[string]string) string {
var sb strings.Builder
sb.Grow(len(tag)*2 + len(content) + 5)
sb.WriteString("<")
sb.WriteString(tag)
fnSortedKeys := func(d map[string]string) []string {
n := len(d)
if n == 0 {
return nil
}
keys := make([]string, n)
i := 0
for k := range d {
keys[i] = k
i++
}
if i > 1 {
sort.Strings(keys)
}
return keys
}
for _, k := range fnSortedKeys(attributes) {
sb.WriteString(" ")
sb.WriteString(k)
sb.WriteString(`="`)
sb.WriteString(attributes[k])
sb.WriteString(`"`)
}
keys := fnSortedKeys(styles)
if len(keys) > 0 {
sb.WriteString(` style="`)
for _, k := range keys {
sb.WriteString(k)
sb.WriteString(":")
sb.WriteString(styles[k])
sb.WriteString(`;`)
}
sb.WriteString(`"`)
}
sb.WriteString(">")
sb.WriteString(content)
sb.WriteString("</")
sb.WriteString(tag)
sb.WriteString(">")
return sb.String()
}