Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions src/base/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,22 @@ impl<'b> BytesCow<'b> {

#[inline]
pub fn from_str_without_replacements(
string: &'b str,
string: impl Into<Cow<'b, str>>,
encoding: &'static Encoding,
) -> Result<Self, HasReplacementsError> {
let (res, _, has_replacements) = encoding.encode(string);
let string = string.into();
let (res, _, has_replacements) = encoding.encode(&string);

if has_replacements {
Err(HasReplacementsError)
} else {
Ok(res.into())
Ok(Self(match res {
Cow::Borrowed(_) => match string {
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
},
Cow::Owned(bytes) => Cow::Owned(bytes),
}))
}
}

Expand Down
17 changes: 17 additions & 0 deletions src/base/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,20 @@ pub use self::encoding::SharedEncoding;
pub(crate) use self::range::Range;
pub use self::spanned::SourceLocation;
pub(crate) use self::spanned::{Spanned, SpannedRawBytes};

/// Unlike eq_ignore_ascii_case it only lowercases the first arg
pub(crate) fn eq_case_insensitive(mixed_case: &[u8], lowercased: &[u8]) -> bool {
debug_assert!(lowercased.iter().all(|&b| b == b.to_ascii_lowercase()));

if mixed_case.len() != lowercased.len() {
return false;
}

for i in 0..mixed_case.len() {
if mixed_case[i].to_ascii_lowercase() != lowercased[i] {
return false;
}
}

true
}
8 changes: 5 additions & 3 deletions src/html/local_name.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use super::Tag;
use crate::base::{Bytes, BytesCow, HasReplacementsError, Range};
use encoding_rs::Encoding;
use std::borrow::Cow;
use std::fmt;

// NOTE: All standard tag names contain only ASCII alpha characters
Expand Down Expand Up @@ -161,11 +162,12 @@ impl<'i> LocalName<'i> {
}

#[inline]
pub fn from_str_without_replacements<'s>(
string: &'s str,
pub(crate) fn from_str_without_replacements<'s>(
string: impl Into<Cow<'s, str>>,
encoding: &'static Encoding,
) -> Result<LocalName<'s>, HasReplacementsError> {
let hash = LocalNameHash::from(string);
let string = string.into();
let hash = LocalNameHash::from(&*string);

if hash.is_empty() {
BytesCow::from_str_without_replacements(string, encoding).map(LocalName::Bytes)
Expand Down
17 changes: 1 addition & 16 deletions src/parser/tree_builder_simulator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
mod ambiguity_guard;

use self::ambiguity_guard::AmbiguityGuard;
use crate::base::eq_case_insensitive;
use crate::html::{LocalNameHash, Namespace, Tag, TextType};
use crate::parser::{TagLexeme, TagTokenOutline};
use TagTokenOutline::{EndTag, StartTag};
Expand Down Expand Up @@ -59,22 +60,6 @@ macro_rules! expect_tag {
};
}

/// Unlike eq_ignore_ascii_case it only lowercases `actual`
#[inline]
fn eq_case_insensitive(actual: &[u8], expected: &[u8]) -> bool {
if actual.len() != expected.len() {
return false;
}

for i in 0..actual.len() {
if actual[i].to_ascii_lowercase() != expected[i] {
return false;
}
}

true
}

#[inline]
fn get_text_type_adjustment(tag_name: LocalNameHash) -> TreeBuilderFeedback {
use TextType::*;
Expand Down
16 changes: 4 additions & 12 deletions src/rewritable_units/element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ impl<'rewriter, 'input_token, H: HandlerTypes> Element<'rewriter, 'input_token,
}

/// Returns an immutable collection of element's attributes.
///
/// `get_attribute` is faster if you only need to read few attributes.
#[inline]
#[must_use]
pub fn attributes(&self) -> &[Attribute<'input_token>] {
Expand All @@ -197,24 +199,14 @@ impl<'rewriter, 'input_token, H: HandlerTypes> Element<'rewriter, 'input_token,
#[inline]
#[must_use]
pub fn get_attribute(&self, name: &str) -> Option<String> {
let name = name.to_ascii_lowercase();

self.attributes().iter().find_map(|attr| {
if attr.name() == name {
Some(attr.value())
} else {
None
}
})
self.start_tag.get_attribute(name)
}

/// Returns `true` if the element has an attribute with `name`.
#[inline]
#[must_use]
pub fn has_attribute(&self, name: &str) -> bool {
let name = name.to_ascii_lowercase();

self.attributes().iter().any(|attr| attr.name() == name)
self.start_tag.has_attribute(name)
}

/// Sets `value` of element's attribute with `name`. The value may have HTML/XML entities.
Expand Down
127 changes: 71 additions & 56 deletions src/rewritable_units/tokens/attributes.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::base::{Bytes, BytesCow};
use crate::base::{Bytes, BytesCow, eq_case_insensitive};
use crate::errors::RewritingError;
use crate::html::escape_double_quotes_only;
use crate::parser::AttributeBuffer;
Expand Down Expand Up @@ -60,9 +60,8 @@ impl<'i> Attribute<'i> {
}
}

#[inline]
fn name_from_str(
name: &str,
fn name_from_string(
name: String,
encoding: &'static Encoding,
) -> Result<BytesCow<'static>, AttributeNameError> {
if name.is_empty() {
Expand All @@ -85,20 +84,6 @@ impl<'i> Attribute<'i> {
}
}

#[inline]
fn try_from(
name: &str,
value: &str,
encoding: &'static Encoding,
) -> Result<Self, AttributeNameError> {
Ok(Attribute {
name: Attribute::name_from_str(name, encoding)?,
value: BytesCow::from_str(value, encoding).into_owned(),
raw: None,
encoding,
})
}

/// Returns the name of the attribute, always ASCII lowercased.
#[inline]
#[must_use]
Expand Down Expand Up @@ -175,6 +160,35 @@ impl<'i> Attributes<'i> {
}
}

pub(crate) fn map_attribute<R>(
&self,
name: &str,
map: impl Fn(&Attribute<'_>) -> R,
) -> Option<R> {
let name = Attribute::name_from_string(name.to_ascii_lowercase(), self.encoding).ok()?;
let check = move |attr: &Attribute<'_>| {
if eq_case_insensitive(&attr.name.as_ref(), &name.as_ref()) {
Some(map(attr))
} else {
None
}
};
match self.items.get() {
Some(items) => items.iter().find_map(check),
None => self.iter_attrs().find_map(|a| check(&a)),
}
}

#[inline(never)]
pub(crate) fn get_attribute(&self, name: &str) -> Option<String> {
self.map_attribute(name, |attr| attr.value())
}

#[inline(never)]
pub(crate) fn has_attribute(&self, name: &str) -> bool {
self.map_attribute(name, |_| true).unwrap_or(false)
}

/// Adds or replaces the attribute. The value may have HTML/XML entities.
///
/// Quotes will be escaped if needed. Other entities won't be changed.
Expand All @@ -184,34 +198,34 @@ impl<'i> Attributes<'i> {
value: &str,
encoding: &'static Encoding,
) -> Result<(), AttributeNameError> {
let name = name.to_ascii_lowercase();
let name = Attribute::name_from_string(name.to_ascii_lowercase(), encoding)?;
let items = self.as_mut_vec();

match items.iter_mut().find(|attr| attr.name() == name.as_str()) {
match items
.iter_mut()
.find(|attr| eq_case_insensitive(&attr.name.as_ref(), &name.as_ref()))
{
Some(attr) => attr.set_value(value),
None => {
items.push(Attribute::try_from(&name, value, encoding)?);
items.push(Attribute {
name,
value: BytesCow::from_str(value, encoding).into_owned(),
raw: None,
encoding,
});
}
}

Ok(())
}

pub fn remove_attribute(&mut self, name: &str) -> bool {
let name = name.to_ascii_lowercase();
let Ok(name) = Attribute::name_from_string(name.to_ascii_lowercase(), self.encoding) else {
return false;
};
let items = self.as_mut_vec();
let mut i = 0;

while i < items.len() {
if items[i].name() == name.as_str() {
items.remove(i);
return true;
}

i += 1;
}

false
let len_before = items.len();
items.retain(|attr| !eq_case_insensitive(&attr.name.as_ref(), &name.as_ref()));
len_before != items.len()
}

pub fn is_empty(&self) -> bool {
Expand All @@ -222,31 +236,32 @@ impl<'i> Attributes<'i> {
.unwrap_or(self.attribute_buffer.is_empty())
}

#[inline(never)]
fn init_items(&self) -> Vec<Attribute<'i>> {
fn iter_attrs(&self) -> impl Iterator<Item = Attribute<'i>> {
let cant_fail = || {
debug_assert!(false);
Bytes::default()
};
self.attribute_buffer
.iter()
.map(|a| {
Attribute::new(
self.input
.opt_slice(Some(a.name))
.unwrap_or_else(cant_fail)
.into(),
self.input
.opt_slice(Some(a.value))
.unwrap_or_else(cant_fail)
.into(),
self.input
.opt_slice(Some(a.raw_range))
.unwrap_or_else(cant_fail),
self.encoding,
)
})
.collect()
self.attribute_buffer.iter().map(move |a| {
Attribute::new(
self.input
.opt_slice(Some(a.name))
.unwrap_or_else(cant_fail)
.into(),
self.input
.opt_slice(Some(a.value))
.unwrap_or_else(cant_fail)
.into(),
self.input
.opt_slice(Some(a.raw_range))
.unwrap_or_else(cant_fail),
self.encoding,
)
})
}

#[inline(never)]
fn init_items(&self) -> Vec<Attribute<'i>> {
self.iter_attrs().collect()
}

pub(crate) fn to_slice(&self) -> &[Attribute<'i>] {
Expand Down
18 changes: 18 additions & 0 deletions src/rewritable_units/tokens/start_tag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,25 @@ impl<'input_token> StartTag<'input_token> {
self.ns.uri()
}

/// Returns the value of an attribute with the `name`. The value may have HTML/XML entities.
///
/// Returns `None` if the element doesn't have an attribute with the `name`.
#[inline]
#[must_use]
pub fn get_attribute(&self, name: &str) -> Option<String> {
self.attributes.get_attribute(name)
}

/// Returns `true` if the element has an attribute with `name`.
#[inline]
#[must_use]
pub fn has_attribute(&self, name: &str) -> bool {
self.attributes.has_attribute(name)
}

/// Returns an immutable collection of tag's attributes.
///
/// `get_attribute` is faster if you only need to read few attributes.
#[inline]
pub fn attributes(&self) -> &[Attribute<'input_token>] {
self.attributes.to_slice()
Expand Down
15 changes: 9 additions & 6 deletions src/rewriter/rewrite_controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use super::{HandlerTypes, RewritingError, Settings};
use crate::base::SharedEncoding;
use crate::html::{LocalName, Namespace};
use crate::memory::SharedMemoryLimiter;
use crate::parser::ActionError;
use crate::rewritable_units::{DocumentEnd, Token, TokenCaptureFlags};
use crate::selectors_vm::Ast;
use crate::selectors_vm::{AuxStartTagInfoRequest, ElementData, SelectorMatchingVm, VmError};
Expand Down Expand Up @@ -107,12 +108,14 @@ impl<H: HandlerTypes> HtmlRewriteController<'_, H> {
) -> StartTagHandlingResult<Self> {
Err(DispatcherError::InfoRequest(Box::new(
move |this, aux_info| {
if let Some(ref mut vm) = this.selector_matching_vm {
let mut match_handler = |m| this.handlers_dispatcher.start_matching(&m);

aux_info_req(vm, aux_info, &mut match_handler)
.map_err(RewritingError::MemoryLimitExceeded)?;
}
let Some(vm) = &mut this.selector_matching_vm else {
debug_assert!(false);
return Err(ActionError::internal("vm req without vm"));
};
let mut match_handler = |m| this.handlers_dispatcher.start_matching(&m);

aux_info_req(vm, aux_info, &mut match_handler)
.map_err(RewritingError::MemoryLimitExceeded)?;

Ok(this.get_capture_flags())
},
Expand Down
5 changes: 2 additions & 3 deletions src/selectors_vm/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ impl Compilable for Expr<OnTagNameExpr> {
OnTagNameExpr::ExplicitAny => self.compile_expr(|_, _| true),
OnTagNameExpr::Unmatchable => self.compile_expr(|_, _| false),
OnTagNameExpr::LocalName(local_name) => {
match LocalName::from_str_without_replacements(local_name, encoding)
match LocalName::from_str_without_replacements(&**local_name, encoding)
.map(LocalName::into_owned)
{
Ok(local_name) => self.compile_expr(move |_, actual| *actual == local_name),
Expand Down Expand Up @@ -400,10 +400,9 @@ mod tests {
test_with_token(html, encoding, |t| match t {
Token::StartTag(t) => {
let (input, attrs) = t.raw_attributes();
let tag_name = t.name();
let attr_matcher = AttributeMatcher::new(*input, attrs, Namespace::Html);
let local_name =
LocalName::from_str_without_replacements(&tag_name, encoding).unwrap();
LocalName::from_str_without_replacements(t.name(), encoding).unwrap();

action(local_name, attr_matcher);
}
Expand Down
Loading