186
htmlx/html.go
Normal file
186
htmlx/html.go
Normal file
@@ -0,0 +1,186 @@
|
||||
package htmlx
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"git.cloudyne.io/go/hiscaler-gox/stringx"
|
||||
)
|
||||
|
||||
var (
|
||||
// Strip regexp
|
||||
rxStrip = regexp.MustCompile(`(?s)<sty(.*)/style>|<scr(.*)/script>|<link(.*)/>|<meta(.*)/>|<!--(.*)-->| style=['"]+(.*)['"]+`)
|
||||
|
||||
// Spaceless regexp
|
||||
rxSpaceless = regexp.MustCompile(`/>\s+</`)
|
||||
|
||||
// Clean regexp
|
||||
rxCleanCSS = regexp.MustCompile(`(?s)<sty(.*)/style>|<link(.*)/>| style=['"]+(.*)['"]+`)
|
||||
rxCleanJavascript = regexp.MustCompile(`(?s)<script(.*)/script>`)
|
||||
rxCleanComment = regexp.MustCompile(`(?s)<!--(.*)-->`)
|
||||
rxCleanMeta = regexp.MustCompile(`(?s)<meta(.*)/>`)
|
||||
)
|
||||
|
||||
type CleanMode uint32
|
||||
|
||||
const (
|
||||
CleanModeCSS CleanMode = 1 << (10 - iota) // 包括元素内嵌样式
|
||||
CleanModeJavascript
|
||||
CleanModeComment
|
||||
CleanModeMeta
|
||||
CleanModeSpace
|
||||
CleanModeAll = CleanModeCSS | CleanModeJavascript | CleanModeComment | CleanModeMeta | CleanModeSpace
|
||||
)
|
||||
|
||||
// Strip Clean html tags
|
||||
// https://stackoverflow.com/questions/55036156/how-to-replace-all-html-tag-with-empty-string-in-golang
|
||||
func Strip(html string) string {
|
||||
html = strings.TrimSpace(html)
|
||||
if html != "" {
|
||||
html = rxStrip.ReplaceAllString(html, "")
|
||||
}
|
||||
if html == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
const (
|
||||
htmlTagStart = 60 // Unicode `<`
|
||||
htmlTagEnd = 62 // Unicode `>`
|
||||
)
|
||||
// Setup a string builder and allocate enough memory for the new string.
|
||||
var builder strings.Builder
|
||||
builder.Grow(len(html) + utf8.UTFMax)
|
||||
|
||||
in := false // True if we are inside an HTML tag.
|
||||
start := 0 // The index of the previous start tag character `<`
|
||||
end := 0 // The index of the previous end tag character `>`
|
||||
|
||||
for i, c := range html {
|
||||
// If this is the last character and we are not in an HTML tag, save it.
|
||||
if (i+1) == len(html) && end >= start && c != htmlTagStart && c != htmlTagEnd {
|
||||
builder.WriteString(html[end:])
|
||||
}
|
||||
|
||||
// Keep going if the character is not `<` or `>`
|
||||
if c != htmlTagStart && c != htmlTagEnd {
|
||||
continue
|
||||
}
|
||||
|
||||
if c == htmlTagStart {
|
||||
// Only update the start if we are not in a tag.
|
||||
// This make sure we strip out `<<br>` not just `<br>`
|
||||
if !in {
|
||||
start = i
|
||||
}
|
||||
in = true
|
||||
|
||||
// Write the valid string between the close and start of the two tags.
|
||||
builder.WriteString(html[end:start])
|
||||
continue
|
||||
}
|
||||
// else c == htmlTagEnd
|
||||
in = false
|
||||
end = i + 1
|
||||
}
|
||||
s := builder.String()
|
||||
if s != "" {
|
||||
s = strings.TrimSpace(Spaceless(s))
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Spaceless 移除多余的空格
|
||||
func Spaceless(html string) string {
|
||||
html = stringx.RemoveExtraSpace(html)
|
||||
if html == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
return rxSpaceless.ReplaceAllString(html, "><")
|
||||
}
|
||||
|
||||
func Clean(html string, cleanMode CleanMode) string {
|
||||
if html == "" {
|
||||
return html
|
||||
}
|
||||
const n = 5
|
||||
modes := [n]bool{} // css, javascript, comment, meta, space, all
|
||||
for i := 0; i < n; i++ {
|
||||
if cleanMode&(1<<uint(10-i)) != 0 {
|
||||
modes[i] = true
|
||||
}
|
||||
}
|
||||
if modes[n-1] {
|
||||
html = Spaceless(rxStrip.ReplaceAllString(html, ""))
|
||||
} else {
|
||||
for i := 0; i < n-2; i++ {
|
||||
if modes[i] {
|
||||
switch i {
|
||||
case 0:
|
||||
html = rxCleanCSS.ReplaceAllString(html, "")
|
||||
case 1:
|
||||
html = rxCleanJavascript.ReplaceAllString(html, "")
|
||||
case 2:
|
||||
html = rxCleanComment.ReplaceAllString(html, "")
|
||||
case 3:
|
||||
html = rxCleanMeta.ReplaceAllString(html, "")
|
||||
case 4:
|
||||
html = Spaceless(html)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return html
|
||||
}
|
||||
|
||||
func Tag(tag, content string, attributes, styles map[string]string) string {
|
||||
var sb strings.Builder
|
||||
sb.Grow(len(tag)*2 + len(content) + 5)
|
||||
sb.WriteString("<")
|
||||
sb.WriteString(tag)
|
||||
fnSortedKeys := func(d map[string]string) []string {
|
||||
n := len(d)
|
||||
if n == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
keys := make([]string, n)
|
||||
i := 0
|
||||
for k := range d {
|
||||
keys[i] = k
|
||||
i++
|
||||
}
|
||||
if i > 1 {
|
||||
sort.Strings(keys)
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
for _, k := range fnSortedKeys(attributes) {
|
||||
sb.WriteString(" ")
|
||||
sb.WriteString(k)
|
||||
sb.WriteString(`="`)
|
||||
sb.WriteString(attributes[k])
|
||||
sb.WriteString(`"`)
|
||||
}
|
||||
|
||||
keys := fnSortedKeys(styles)
|
||||
if len(keys) > 0 {
|
||||
sb.WriteString(` style="`)
|
||||
for _, k := range keys {
|
||||
sb.WriteString(k)
|
||||
sb.WriteString(":")
|
||||
sb.WriteString(styles[k])
|
||||
sb.WriteString(`;`)
|
||||
}
|
||||
sb.WriteString(`"`)
|
||||
}
|
||||
sb.WriteString(">")
|
||||
sb.WriteString(content)
|
||||
sb.WriteString("</")
|
||||
sb.WriteString(tag)
|
||||
sb.WriteString(">")
|
||||
return sb.String()
|
||||
}
|
||||
200
htmlx/html_test.go
Normal file
200
htmlx/html_test.go
Normal file
@@ -0,0 +1,200 @@
|
||||
package htmlx
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestStrip(t *testing.T) {
|
||||
tests := []struct {
|
||||
tag string
|
||||
html string
|
||||
expected string
|
||||
}{
|
||||
{"t0", "<div>hello</div>", "hello"},
|
||||
{"t1", `
|
||||
|
||||
<div>hello</div>
|
||||
|
||||
`, "hello"},
|
||||
{"t3", "<div style='font-size: 12px;'>hello</div>", "hello"},
|
||||
{"t4", "<style>body {font-size: 12px}</style><div style='font-size: 12px;'>hello</div>", "hello"},
|
||||
{"t4", `
|
||||
<link rel='stylesheet' id='wp-block-library-css' href='https://www.example.com/style.min.css?ver=5.9.1' type='text/css' media='all' />
|
||||
<style type="text/css">body {font-size: 12px}</style><!-- / See later. --><div style='font-size: 12px;'>hello</div>`, "hello"},
|
||||
{"t5", `
|
||||
<body class="nodata company_blog" style="">
|
||||
<script> var toolbarSearchExt = '{"landingWord":[],"queryWord":"","tag":["function","class","filter","search"],"title":"Yii: 设置数据翻页"}';
|
||||
</script>
|
||||
<script src="https://g.csdnimg.cn/common/csdn-toolbar/csdn-toolbar.js" type="text/javascript"></script>
|
||||
<script src="https://g.csdnimg.cn/common/csdn-toolbar/csdn-toolbar1.js" type="text/javascript"></script>
|
||||
<script>
|
||||
(function(){
|
||||
var bp = document.createElement('script');
|
||||
var curProtocol = window.location.protocol.split(':')[0];
|
||||
if (curProtocol === 'https') {
|
||||
bp.src = 'https://zz.bdstatic.com/linksubmit/push.js';
|
||||
}
|
||||
else {
|
||||
bp.src = 'http://push.zhanzhang.baidu.com/push.js';
|
||||
}
|
||||
var s = document.getElementsByTagName("script")[0];
|
||||
s.parentNode.insertBefore(bp, s);
|
||||
})();
|
||||
</script>
|
||||
<link rel="stylesheet" href="https://csdnimg.cn/release/blogv2/dist/pc/css/blog_code-01256533b5.min.css">
|
||||
<link rel="stylesheet" href="https://csdnimg.cn/release/blogv2/dist/mdeditor/css/editerView/chart-3456820cac.css" /><div style='font-size: 12px;'>hello</div></body>`, "hello"},
|
||||
{"t6", `<!-- show up to 2 reviews by default -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p>Custom flags for your garden are a great way to show your personality to your friends and neighbors. Design and turn it into an eye-catching flag all year round. This will be a beautiful addition to your yard and garden, also a simple sign to show your patriotism on Memorial Day, 4th of July or Veterans Day, Christmas holidays or any holiday of the year.
|
||||
|
||||
</p>`, "Custom flags for your garden are a great way to show your personality to your friends and neighbors. Design and turn it into an eye-catching flag all year round. This will be a beautiful addition to your yard and garden, also a simple sign to show your patriotism on Memorial Day, 4th of July or Veterans Day, Christmas holidays or any holiday of the year."},
|
||||
{"t7", "<div>hello<div>", "hello"},
|
||||
{"t8", " <div> hello world <div>", "hello world"},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
equal := Strip(test.html)
|
||||
assert.Equal(t, test.expected, equal, test.tag)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkStrip(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
Strip(`<!-- show up to 2 reviews by default -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p>Custom flags for your garden are a great way to show your personality to your friends and neighbors. Design and turn it into an eye-catching flag all year round. This will be a beautiful addition to your yard and garden, also a simple sign to show your patriotism on Memorial Day, 4th of July or Veterans Day, Christmas holidays or any holiday of the year.
|
||||
|
||||
</p>`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpaceless(t *testing.T) {
|
||||
tests := []struct {
|
||||
tag string
|
||||
html string
|
||||
expected string
|
||||
}{
|
||||
{"t0", "<div>hello</div>", "<div>hello</div>"},
|
||||
{"t1", `
|
||||
|
||||
<div>hello</div>
|
||||
|
||||
`, "<div>hello</div>"},
|
||||
{"t3", "<div style='font-size: 12px;'>hello</div>", "<div style='font-size: 12px;'>hello</div>"},
|
||||
{"t4", "<style>body {font-size: 12px}</style><div style='font-size: 12px;'>hello</div>", "<style>body {font-size: 12px}</style><div style='font-size: 12px;'>hello</div>"},
|
||||
{"t4", `
|
||||
<link rel='stylesheet' id='wp-block-library-css' href='https://www.example.com/style.min.css?ver=5.9.1' type='text/css' media='all' />
|
||||
<style type="text/css">body {font-size: 12px}</style><!-- / See later. --><div style='font-size: 12px;'>hello</div>`, `<link rel='stylesheet' id='wp-block-library-css' href='https://www.example.com/style.min.css?ver=5.9.1' type='text/css' media='all' />
|
||||
<style type="text/css">body {font-size: 12px}</style><!-- / See later. --><div style='font-size: 12px;'>hello</div>`},
|
||||
{"t7", "<div> hello </div> <span></span>", "<div> hello </div> <span></span>"},
|
||||
{"t8", `<!-- show up to 2 reviews by default -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p>Custom flags for your garden are a great way to show your personality to your friends and neighbors. Design and turn it into an eye-catching flag all year round. This will be a beautiful addition to your yard and garden, also a simple sign to show your patriotism on Memorial Day, 4th of July or Veterans Day, Christmas holidays or any holiday of the year.
|
||||
|
||||
</p>`, `<!-- show up to 2 reviews by default --> <p>Custom flags for your garden are a great way to show your personality to your friends and neighbors. Design and turn it into an eye-catching flag all year round. This will be a beautiful addition to your yard and garden, also a simple sign to show your patriotism on Memorial Day, 4th of July or Veterans Day, Christmas holidays or any holiday of the year. </p>`},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
html := Spaceless(test.html)
|
||||
assert.Equal(t, test.expected, html, test.tag)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClean(t *testing.T) {
|
||||
tests := []struct {
|
||||
tag string
|
||||
html string
|
||||
cleanMode CleanMode
|
||||
expected string
|
||||
}{
|
||||
{"tcss1", "<div>hello</div>", CleanModeCSS, "<div>hello</div>"},
|
||||
{"tcss2", "<style>body {font-size: 12px}</style><div style='font-size: 12px;'>hello</div>", CleanModeCSS, "<div>hello</div>"},
|
||||
{"tjavascript1", `<script src="//www.a.com/1.8.5/blog.js" type='text/javascript'></script><style>body {font-size: 12px}</style><div style='font-size: 12px;'>hello</div>`, CleanModeJavascript, "<style>body {font-size: 12px}</style><div style='font-size: 12px;'>hello</div>"},
|
||||
{"tcomment1", `<script src="//www.a.com/1.8.5/blog.js" type='text/javascript'></script><!--comment--><style>body {font-size: 12px}</style><div style='font-size: 12px;'>hello</div>`, CleanModeComment, "<script src=\"//www.a.com/1.8.5/blog.js\" type='text/javascript'></script><style>body {font-size: 12px}</style><div style='font-size: 12px;'>hello</div>"},
|
||||
{"tcss,javascript,comment", `<script src="//www.a.com/1.8.5/blog.js" type='text/javascript'></script><!--comment--><style>body {font-size: 12px}</style><div style='font-size: 12px;'>hello</div>`, CleanModeCSS | CleanModeJavascript | CleanModeComment, "<div>hello</div>"},
|
||||
{"tall1", `<script>alert("ddd")</script><style>body {font-size: 12px}</style><div style='font-size: 12px;'>hello</div>`, CleanModeAll, "<div>hello</div>"},
|
||||
{"tall2", `<!-- show up to 2 reviews by default -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p>Product details: +++ Material: 100% Ceramic +++ Size: 11oz or 15oz +++ Dye Sublimation graphics for exceptional prints. +++ Dishwasher and microwave safe. +++ Image is printed on both sides of mug. +++ Printed in the U.S.A. +++ Shipping info: Shipping time is approximately 5-7 business days.
|
||||
|
||||
</p>`, CleanModeAll, "<p>Product details: +++ Material: 100% Ceramic +++ Size: 11oz or 15oz +++ Dye Sublimation graphics for exceptional prints. +++ Dishwasher and microwave safe. +++ Image is printed on both sides of mug. +++ Printed in the U.S.A. +++ Shipping info: Shipping time is approximately 5-7 business days. </p>"},
|
||||
{"tall3", `<div> 1 2 </div> <div>2</div>`, CleanModeAll, `<div> 1 2 </div> <div>2</div>`},
|
||||
}
|
||||
|
||||
for _, testCase := range tests {
|
||||
html := Clean(testCase.html, testCase.cleanMode)
|
||||
assert.Equal(t, testCase.expected, html, testCase.tag)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTag(t *testing.T) {
|
||||
tests := []struct {
|
||||
tag string
|
||||
elementTag string
|
||||
content string
|
||||
attributes map[string]string
|
||||
styles map[string]string
|
||||
expected string
|
||||
}{
|
||||
{"t0", "div", "hello", nil, nil, "<div>hello</div>"},
|
||||
{"t1", "div", "hello", map[string]string{"id": "name"}, nil, `<div id="name">hello</div>`},
|
||||
{"t1.1", "div", "hello", map[string]string{"id": "name", "name": "name"}, nil, `<div id="name" name="name">hello</div>`},
|
||||
{"t2", "div", "hello", map[string]string{"id": "name", "data-tag": "123"}, map[string]string{"font-size": "1", "font-weight": "bold"}, `<div data-tag="123" id="name" style="font-size:1;font-weight:bold;">hello</div>`},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
equal := Tag(test.elementTag, test.content, test.attributes, test.styles)
|
||||
assert.Equal(t, test.expected, equal, test.tag)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTag(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
Tag("div", "hello", map[string]string{"id": "name"}, map[string]string{"font-size": "1"})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user