186
htmlx/html.go
Normal file
186
htmlx/html.go
Normal file
@@ -0,0 +1,186 @@
|
||||
package htmlx
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"git.cloudyne.io/go/hiscaler-gox/stringx"
|
||||
)
|
||||
|
||||
var (
|
||||
// Strip regexp
|
||||
rxStrip = regexp.MustCompile(`(?s)<sty(.*)/style>|<scr(.*)/script>|<link(.*)/>|<meta(.*)/>|<!--(.*)-->| style=['"]+(.*)['"]+`)
|
||||
|
||||
// Spaceless regexp
|
||||
rxSpaceless = regexp.MustCompile(`/>\s+</`)
|
||||
|
||||
// Clean regexp
|
||||
rxCleanCSS = regexp.MustCompile(`(?s)<sty(.*)/style>|<link(.*)/>| style=['"]+(.*)['"]+`)
|
||||
rxCleanJavascript = regexp.MustCompile(`(?s)<script(.*)/script>`)
|
||||
rxCleanComment = regexp.MustCompile(`(?s)<!--(.*)-->`)
|
||||
rxCleanMeta = regexp.MustCompile(`(?s)<meta(.*)/>`)
|
||||
)
|
||||
|
||||
type CleanMode uint32
|
||||
|
||||
const (
|
||||
CleanModeCSS CleanMode = 1 << (10 - iota) // 包括元素内嵌样式
|
||||
CleanModeJavascript
|
||||
CleanModeComment
|
||||
CleanModeMeta
|
||||
CleanModeSpace
|
||||
CleanModeAll = CleanModeCSS | CleanModeJavascript | CleanModeComment | CleanModeMeta | CleanModeSpace
|
||||
)
|
||||
|
||||
// Strip Clean html tags
|
||||
// https://stackoverflow.com/questions/55036156/how-to-replace-all-html-tag-with-empty-string-in-golang
|
||||
func Strip(html string) string {
|
||||
html = strings.TrimSpace(html)
|
||||
if html != "" {
|
||||
html = rxStrip.ReplaceAllString(html, "")
|
||||
}
|
||||
if html == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
const (
|
||||
htmlTagStart = 60 // Unicode `<`
|
||||
htmlTagEnd = 62 // Unicode `>`
|
||||
)
|
||||
// Setup a string builder and allocate enough memory for the new string.
|
||||
var builder strings.Builder
|
||||
builder.Grow(len(html) + utf8.UTFMax)
|
||||
|
||||
in := false // True if we are inside an HTML tag.
|
||||
start := 0 // The index of the previous start tag character `<`
|
||||
end := 0 // The index of the previous end tag character `>`
|
||||
|
||||
for i, c := range html {
|
||||
// If this is the last character and we are not in an HTML tag, save it.
|
||||
if (i+1) == len(html) && end >= start && c != htmlTagStart && c != htmlTagEnd {
|
||||
builder.WriteString(html[end:])
|
||||
}
|
||||
|
||||
// Keep going if the character is not `<` or `>`
|
||||
if c != htmlTagStart && c != htmlTagEnd {
|
||||
continue
|
||||
}
|
||||
|
||||
if c == htmlTagStart {
|
||||
// Only update the start if we are not in a tag.
|
||||
// This make sure we strip out `<<br>` not just `<br>`
|
||||
if !in {
|
||||
start = i
|
||||
}
|
||||
in = true
|
||||
|
||||
// Write the valid string between the close and start of the two tags.
|
||||
builder.WriteString(html[end:start])
|
||||
continue
|
||||
}
|
||||
// else c == htmlTagEnd
|
||||
in = false
|
||||
end = i + 1
|
||||
}
|
||||
s := builder.String()
|
||||
if s != "" {
|
||||
s = strings.TrimSpace(Spaceless(s))
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Spaceless 移除多余的空格
|
||||
func Spaceless(html string) string {
|
||||
html = stringx.RemoveExtraSpace(html)
|
||||
if html == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
return rxSpaceless.ReplaceAllString(html, "><")
|
||||
}
|
||||
|
||||
func Clean(html string, cleanMode CleanMode) string {
|
||||
if html == "" {
|
||||
return html
|
||||
}
|
||||
const n = 5
|
||||
modes := [n]bool{} // css, javascript, comment, meta, space, all
|
||||
for i := 0; i < n; i++ {
|
||||
if cleanMode&(1<<uint(10-i)) != 0 {
|
||||
modes[i] = true
|
||||
}
|
||||
}
|
||||
if modes[n-1] {
|
||||
html = Spaceless(rxStrip.ReplaceAllString(html, ""))
|
||||
} else {
|
||||
for i := 0; i < n-2; i++ {
|
||||
if modes[i] {
|
||||
switch i {
|
||||
case 0:
|
||||
html = rxCleanCSS.ReplaceAllString(html, "")
|
||||
case 1:
|
||||
html = rxCleanJavascript.ReplaceAllString(html, "")
|
||||
case 2:
|
||||
html = rxCleanComment.ReplaceAllString(html, "")
|
||||
case 3:
|
||||
html = rxCleanMeta.ReplaceAllString(html, "")
|
||||
case 4:
|
||||
html = Spaceless(html)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return html
|
||||
}
|
||||
|
||||
func Tag(tag, content string, attributes, styles map[string]string) string {
|
||||
var sb strings.Builder
|
||||
sb.Grow(len(tag)*2 + len(content) + 5)
|
||||
sb.WriteString("<")
|
||||
sb.WriteString(tag)
|
||||
fnSortedKeys := func(d map[string]string) []string {
|
||||
n := len(d)
|
||||
if n == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
keys := make([]string, n)
|
||||
i := 0
|
||||
for k := range d {
|
||||
keys[i] = k
|
||||
i++
|
||||
}
|
||||
if i > 1 {
|
||||
sort.Strings(keys)
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
for _, k := range fnSortedKeys(attributes) {
|
||||
sb.WriteString(" ")
|
||||
sb.WriteString(k)
|
||||
sb.WriteString(`="`)
|
||||
sb.WriteString(attributes[k])
|
||||
sb.WriteString(`"`)
|
||||
}
|
||||
|
||||
keys := fnSortedKeys(styles)
|
||||
if len(keys) > 0 {
|
||||
sb.WriteString(` style="`)
|
||||
for _, k := range keys {
|
||||
sb.WriteString(k)
|
||||
sb.WriteString(":")
|
||||
sb.WriteString(styles[k])
|
||||
sb.WriteString(`;`)
|
||||
}
|
||||
sb.WriteString(`"`)
|
||||
}
|
||||
sb.WriteString(">")
|
||||
sb.WriteString(content)
|
||||
sb.WriteString("</")
|
||||
sb.WriteString(tag)
|
||||
sb.WriteString(">")
|
||||
return sb.String()
|
||||
}
|
||||
Reference in New Issue
Block a user