1//! A Latin1 or UTF-16 encoded, reference counted, immutable string.23// Required per unsafe code standards to ensure every unsafe usage is properly documented.4// - `unsafe_op_in_unsafe_fn` will be warn-by-default in edition 2024:5// https://github.com/rust-lang/rust/issues/71668#issuecomment-11893968606// - `undocumented_unsafe_blocks` and `missing_safety_doc` requires a `Safety:` section in the7// comment or doc of the unsafe block or function, respectively.8#![deny(9 unsafe_op_in_unsafe_fn,10 clippy::undocumented_unsafe_blocks,11 clippy::missing_safety_doc12)]13#![allow(clippy::module_name_repetitions)]1415modbuilder;16modcommon;17moddisplay;18moditer;19modstr;2021#[cfg(test)]22modtests;2324useself::{iter::Windows, str::JsSliceIndex};25usecrate::display::{JsStrDisplayEscaped, JsStrDisplayLossy};26#[doc(inline)]27pub use crate::{28 builder::{CommonJsStringBuilder, Latin1JsStringBuilder, Utf16JsStringBuilder},29 common::StaticJsStrings,30 iter::Iter,31 str::{JsStr, JsStrVariant},32};33usestd::fmt::Write;34usestd::{35 alloc::{Layout, alloc, dealloc},36 cell::Cell,37 convert::Infallible,38 hash::{Hash, Hasher},39 process::abort,40 ptr::{self, NonNull},41 str::FromStr,42};43usestd::{borrow::Cow, mem::ManuallyDrop};4445fnalloc_overflow() -> ! {46panic!("detected overflow during string allocation")47}4849/// Helper function to check if a `char` is trimmable.50pub(crate)const fnis_trimmable_whitespace(c: char) -> bool {51// The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does52 //53 // Rust uses \p{White_Space} by default, which also includes:54 // `\u{0085}' (next line)55 // And does not include:56 // '\u{FEFF}' (zero width non-breaking space)57 // Explicit whitespace: https://tc39.es/ecma262/#sec-white-space58matches!(59 c,60'\u{0009}'|'\u{000B}'|'\u{000C}'|'\u{0020}'|'\u{00A0}'|'\u{FEFF}'|61// Unicode Space_Separator category62'\u{1680}'|'\u{2000}'63..='\u{200A}'|'\u{202F}'|'\u{205F}'|'\u{3000}'|64// Line terminators: https://tc39.es/ecma262/#sec-line-terminators65'\u{000A}'|'\u{000D}'|'\u{2028}'|'\u{2029}'66)67}6869/// Helper function to check if a `u8` latin1 character is trimmable.70pub(crate)const fnis_trimmable_whitespace_latin1(c: u8) -> bool {71// The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does72 //73 // Rust uses \p{White_Space} by default, which also includes:74 // `\u{0085}' (next line)75 // And does not include:76 // '\u{FEFF}' (zero width non-breaking space)77 // Explicit whitespace: https://tc39.es/ecma262/#sec-white-space78matches!(79 c,800x09|0x0B|0x0C|0x20|0xA0|81// Line terminators: https://tc39.es/ecma262/#sec-line-terminators820x0A|0x0D83)84}8586/// Represents a Unicode codepoint within a [`JsString`], which could be a valid87/// '[Unicode scalar value]', or an unpaired surrogate.88///89/// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value90#[derive(Clone, Copy, Debug, Eq, PartialEq)]91pub enumCodePoint {92/// A valid Unicode scalar value.93Unicode(char),9495/// An unpaired surrogate.96UnpairedSurrogate(u16),97}9899implCodePoint {100/// Get the number of UTF-16 code units needed to encode this code point.101#[inline]102 #[must_use]103pub const fncode_unit_count(self) -> usize {104matchself{105Self::Unicode(c) => c.len_utf16(),106Self::UnpairedSurrogate(_) =>1,107 }108 }109110/// Convert the code point to its [`u32`] representation.111#[inline]112 #[must_use]113pub fnas_u32(self) -> u32 {114matchself{115Self::Unicode(c) => u32::from(c),116Self::UnpairedSurrogate(surr) => u32::from(surr),117 }118 }119120/// If the code point represents a valid 'Unicode scalar value', returns its [`char`]121 /// representation, otherwise returns [`None`] on unpaired surrogates.122#[inline]123 #[must_use]124pub const fnas_char(self) ->Option<char> {125matchself{126Self::Unicode(c) =>Some(c),127Self::UnpairedSurrogate(_) =>None,128 }129 }130131/// Encodes this code point as UTF-16 into the provided u16 buffer, and then returns the subslice132 /// of the buffer that contains the encoded character.133 ///134 /// # Panics135 ///136 /// Panics if the buffer is not large enough. A buffer of length 2 is large enough to encode any137 /// code point.138#[inline]139 #[must_use]140pub fnencode_utf16(self, dst:&mut[u16]) ->&mut[u16] {141matchself{142Self::Unicode(c) => c.encode_utf16(dst),143Self::UnpairedSurrogate(surr) => {144 dst[0] = surr;145&mutdst[0..=0]146 }147 }148 }149}150151implstd::fmt::DisplayforCodePoint {152#[inline]153fnfmt(&self, f:&mutstd::fmt::Formatter<'_>) -> std::fmt::Result {154matchself{155 CodePoint::Unicode(c) => f.write_char(*c),156 CodePoint::UnpairedSurrogate(c) => {157write!(f,"\\u{c:04X}")158 }159 }160 }161}162163/// A `usize` contains a flag and the length of Latin1/UTF-16 .164/// ```text165/// ┌────────────────────────────────────┐166/// │ length (usize::BITS - 1) │ flag(1) │167/// └────────────────────────────────────┘168/// ```169/// The latin1/UTF-16 flag is stored in the bottom bit.170#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]171#[repr(transparent)]172structTaggedLen(usize);173174implTaggedLen {175constLATIN1_BITFLAG: usize =1<<0;176constBITFLAG_COUNT: usize =1;177178const fnnew(len: usize, latin1: bool) ->Self{179Self((len <<Self::BITFLAG_COUNT) | (latin1asusize))180 }181182const fnis_latin1(self) -> bool {183 (self.0&Self::LATIN1_BITFLAG) !=0184}185186const fnlen(self) -> usize {187self.0>>Self::BITFLAG_COUNT188 }189}190191/// The raw representation of a [`JsString`] in the heap.192#[repr(C)]193#[allow(missing_debug_implementations)]194pub structRawJsString {195 tagged_len: TaggedLen,196 refcount: Cell<usize>,197 data: [u8;0],198}199200implRawJsString {201const fnis_latin1(&self) -> bool {202self.tagged_len.is_latin1()203 }204205const fnlen(&self) -> usize {206self.tagged_len.len()207 }208}209210constDATA_OFFSET: usize = size_of::<RawJsString>();211212enumUnwrapped<'a> {213 Heap(NonNull<RawJsString>),214 Static(&'aJsStr<'static>),215}216217/// A Latin1 or UTF-16–encoded, reference counted, immutable string.218///219/// This is pretty similar to a <code>[Rc][std::rc::Rc]\<[\[u16\]][slice]\></code>, but without the220/// length metadata associated with the `Rc` fat pointer. Instead, the length of every string is221/// stored on the heap, along with its reference counter and its data.222///223/// The string can be latin1 (stored as a byte for space efficiency) or U16 encoding.224///225/// We define some commonly used string constants in an interner. For these strings, we don't allocate226/// memory on the heap to reduce the overhead of memory allocation and reference counting.227#[allow(clippy::module_name_repetitions)]228pub structJsString {229 ptr: NonNull<RawJsString>,230}231232// JsString should always be pointer sized.233static_assertions::assert_eq_size!(JsString,*const());234235impl<'a> From<&'aJsString>forJsStr<'a> {236#[inline]237fnfrom(value:&'aJsString) ->Self{238 value.as_str()239 }240}241242impl<'a> IntoIteratorfor&'aJsString {243typeIntoIter = Iter<'a>;244typeItem = u16;245246#[inline]247fninto_iter(self) ->Self::IntoIter {248self.iter()249 }250}251252implJsString {253/// Create an iterator over the [`JsString`].254#[inline]255 #[must_use]256pub fniter(&self) -> Iter<'_> {257self.as_str().iter()258 }259260/// Create an iterator over overlapping subslices of length size.261#[inline]262 #[must_use]263pub fnwindows(&self, size: usize) -> Windows<'_> {264self.as_str().windows(size)265 }266267/// Decodes a [`JsString`] into a [`String`], replacing invalid data with its escaped representation268 /// in 4 digit hexadecimal.269#[inline]270 #[must_use]271pub fnto_std_string_escaped(&self) -> String {272self.display_escaped().to_string()273 }274275/// Decodes a [`JsString`] into a [`String`], replacing invalid data with the276 /// replacement character U+FFFD.277#[inline]278 #[must_use]279pub fnto_std_string_lossy(&self) -> String {280self.display_lossy().to_string()281 }282283/// Decodes a [`JsString`] into a [`String`], returning an error if the string contains unpaired284 /// surrogates.285 ///286 /// # Errors287 ///288 /// [`FromUtf16Error`][std::string::FromUtf16Error] if it contains any invalid data.289#[inline]290pub fnto_std_string(&self) ->Result<String, std::string::FromUtf16Error> {291self.as_str().to_std_string()292 }293294/// Decodes a [`JsString`] into an iterator of [`Result<String, u16>`], returning surrogates as295 /// errors.296#[inline]297pub fnto_std_string_with_surrogates(&self) ->implIterator<Item =Result<String, u16>> +'_{298self.as_str().to_std_string_with_surrogates()299 }300301/// Maps the valid segments of an UTF16 string and leaves the unpaired surrogates unchanged.302#[inline]303 #[must_use]304pub fnmap_valid_segments<F>(&self,mutf: F) ->Self305where306F: FnMut(String) -> String,307 {308letmuttext = Vec::new();309310forpartinself.to_std_string_with_surrogates() {311matchpart {312Ok(string) => text.extend(f(string).encode_utf16()),313Err(surr) => text.push(surr),314 }315 }316317Self::from(&text[..])318 }319320/// Gets an iterator of all the Unicode codepoints of a [`JsString`].321#[inline]322pub fncode_points(&self) ->implIterator<Item = CodePoint> + Clone +'_{323self.as_str().code_points()324 }325326/// Abstract operation `StringIndexOf ( string, searchValue, fromIndex )`327 ///328 /// Note: Instead of returning an isize with `-1` as the "not found" value, we make use of the329 /// type system and return <code>[Option]\<usize\></code> with [`None`] as the "not found" value.330 ///331 /// More information:332 /// - [ECMAScript reference][spec]333 ///334 /// [spec]: https://tc39.es/ecma262/#sec-stringindexof335#[inline]336 #[must_use]337pub fnindex_of(&self, search_value: JsStr<'_>, from_index: usize) ->Option<usize> {338self.as_str().index_of(search_value, from_index)339 }340341/// Abstract operation `CodePointAt( string, position )`.342 ///343 /// The abstract operation `CodePointAt` takes arguments `string` (a String) and `position` (a344 /// non-negative integer) and returns a Record with fields `[[CodePoint]]` (a code point),345 /// `[[CodeUnitCount]]` (a positive integer), and `[[IsUnpairedSurrogate]]` (a Boolean). It346 /// interprets string as a sequence of UTF-16 encoded code points, as described in 6.1.4, and reads347 /// from it a single code point starting with the code unit at index `position`.348 ///349 /// More information:350 /// - [ECMAScript reference][spec]351 ///352 /// [spec]: https://tc39.es/ecma262/#sec-codepointat353 ///354 /// # Panics355 ///356 /// If `position` is smaller than size of string.357#[inline]358 #[must_use]359pub fncode_point_at(&self, position: usize) -> CodePoint {360self.as_str().code_point_at(position)361 }362363/// Abstract operation `StringToNumber ( str )`364 ///365 /// More information:366 /// - [ECMAScript reference][spec]367 ///368 /// [spec]: https://tc39.es/ecma262/#sec-stringtonumber369#[inline]370 #[must_use]371pub fnto_number(&self) -> f64 {372self.as_str().to_number()373 }374375/// Get the length of the [`JsString`].376#[inline]377 #[must_use]378pub fnlen(&self) -> usize {379self.as_str().len()380 }381382/// Return true if the [`JsString`] is emtpy.383#[inline]384 #[must_use]385pub fnis_empty(&self) -> bool {386self.len() ==0387}388389/// Convert the [`JsString`] into a [`Vec<U16>`].390#[inline]391 #[must_use]392pub fnto_vec(&self) -> Vec<u16> {393self.as_str().to_vec()394 }395396/// Check if the [`JsString`] contains a byte.397#[inline]398 #[must_use]399pub fncontains(&self, element: u8) -> bool {400self.as_str().contains(element)401 }402403/// Trim whitespace from the start and end of the [`JsString`].404#[inline]405 #[must_use]406pub fntrim(&self) -> JsStr<'_> {407self.as_str().trim()408 }409410/// Trim whitespace from the start of the [`JsString`].411#[inline]412 #[must_use]413pub fntrim_start(&self) -> JsStr<'_> {414self.as_str().trim_start()415 }416417/// Trim whitespace from the end of the [`JsString`].418#[inline]419 #[must_use]420pub fntrim_end(&self) -> JsStr<'_> {421self.as_str().trim_end()422 }423424/// Get the element a the given index, [`None`] otherwise.425#[inline]426 #[must_use]427pub fnget<'a, I>(&'aself, index: I) ->Option<I::Value>428where429I: JsSliceIndex<'a>,430 {431self.as_str().get(index)432 }433434/// Returns an element or subslice depending on the type of index, without doing bounds check.435 ///436 /// # Safety437 ///438 /// Caller must ensure the index is not out of bounds439#[inline]440 #[must_use]441pub unsafe fnget_unchecked<'a, I>(&'aself, index: I) -> I::Value442where443I: JsSliceIndex<'a>,444 {445// SAFETY: Caller must ensure the index is not out of bounds446unsafe{self.as_str().get_unchecked(index) }447 }448449/// Get the element a the given index.450 ///451 /// # Panics452 ///453 /// If the index is out of bounds.454#[inline]455 #[must_use]456pub fnget_expect<'a, I>(&'aself, index: I) -> I::Value457where458I: JsSliceIndex<'a>,459 {460self.as_str().get_expect(index)461 }462463/// Gets a displayable escaped string. This may be faster and has fewer464 /// allocations than `format!("{}", str.to_string_escaped())` when465 /// displaying.466#[inline]467 #[must_use]468pub fndisplay_escaped(&self) -> JsStrDisplayEscaped<'_> {469self.as_str().display_escaped()470 }471472/// Gets a displayable lossy string. This may be faster and has fewer473 /// allocations than `format!("{}", str.to_string_lossy())` when displaying.474#[inline]475 #[must_use]476pub fndisplay_lossy(&self) -> JsStrDisplayLossy<'_> {477self.as_str().display_lossy()478 }479480/// Consumes the [`JsString`], returning a pointer to `RawJsString`.481 ///482 /// To avoid a memory leak the pointer must be converted back to a `JsString` using483 /// [`JsString::from_raw`].484#[inline]485 #[must_use]486pub fninto_raw(self) -> NonNull<RawJsString> {487 ManuallyDrop::new(self).ptr488 }489490/// Constructs a `JsString` from a pointer to `RawJsString`.491 ///492 /// The raw pointer must have been previously returned by a call to493 /// [`JsString::into_raw`].494 ///495 /// # Safety496 ///497 /// This function is unsafe because improper use may lead to memory unsafety,498 /// even if the returned `JsString` is never accessed.499#[inline]500 #[must_use]501pub unsafe fnfrom_raw(ptr: NonNull<RawJsString>) ->Self{502Self{ ptr }503 }504}505506// `&JsStr<'static>` must always be aligned so it can be taggged.507static_assertions::const_assert!(align_of::<*constJsStr<'static>>() >=2);508509implJsString {510/// Create a [`JsString`] from a static js string.511#[must_use]512pub const fnfrom_static_js_str(src:&'staticJsStr<'static>) ->Self{513letsrc = ptr::from_ref(src);514515// SAFETY: A reference cannot be null, so this is safe.516 //517 // TODO: Replace once `NonNull::from_ref()` is stabilized.518letptr =unsafe{ NonNull::new_unchecked(src.cast_mut()) };519520// SAFETY:521 // - Adding one to an aligned pointer will tag the pointer's last bit.522 // - The pointer's provenance remains unchanged, so this is safe.523lettagged_ptr =unsafe{ ptr.byte_add(1) };524525 JsString {526 ptr: tagged_ptr.cast::<RawJsString>(),527 }528 }529530/// Check if the [`JsString`] is static.531#[inline]532 #[must_use]533pub fnis_static(&self) -> bool {534self.ptr.addr().get() &1!=0535}536537pub(crate)fnunwrap(&self) -> Unwrapped<'_> {538ifself.is_static() {539// SAFETY: Static pointer is tagged and already checked, so this is safe.540letptr =unsafe{self.ptr.byte_sub(1) };541542// SAFETY: A static pointer always points to a valid JsStr, so this is safe.543Unwrapped::Static(unsafe{ ptr.cast::<JsStr<'static>>().as_ref() })544 }else{545 Unwrapped::Heap(self.ptr)546 }547 }548549/// Obtains the underlying [`&[u16]`][slice] slice of a [`JsString`]550#[inline]551 #[must_use]552pub fnas_str(&self) -> JsStr<'_> {553letptr =matchself.unwrap() {554 Unwrapped::Heap(ptr) => ptr.as_ptr(),555 Unwrapped::Static(js_str) =>return*js_str,556 };557558// SAFETY:559 // - Unwrapped heap ptr is always a valid heap allocated RawJsString.560 // - Length of a heap allocated string always contains the correct size of the string.561unsafe{562lettagged_len = (*ptr).tagged_len;563letlen = tagged_len.len();564letis_latin1 = tagged_len.is_latin1();565letptr = (&rawconst(*ptr).data).cast::<u8>();566567ifis_latin1 {568 JsStr::latin1(std::slice::from_raw_parts(ptr, len))569 }else{570// SAFETY: Raw data string is always correctly aligned when allocated.571#[allow(clippy::cast_ptr_alignment)]572JsStr::utf16(std::slice::from_raw_parts(ptr.cast::<u16>(), len))573 }574 }575 }576577/// Creates a new [`JsString`] from the concatenation of `x` and `y`.578#[inline]579 #[must_use]580pub fnconcat(x: JsStr<'_>, y: JsStr<'_>) ->Self{581Self::concat_array(&[x, y])582 }583584/// Creates a new [`JsString`] from the concatenation of every element of585 /// `strings`.586#[inline]587 #[must_use]588pub fnconcat_array(strings:&[JsStr<'_>]) ->Self{589letmutlatin1_encoding =true;590letmutfull_count =0usize;591forstringinstrings {592letSome(sum) = full_count.checked_add(string.len())else{593 alloc_overflow()594 };595if!string.is_latin1() {596 latin1_encoding =false;597 }598 full_count = sum;599 }600601letptr =Self::allocate_inner(full_count, latin1_encoding);602603letstring = {604// SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer.605letmutdata =unsafe{ (&rawmut(*ptr.as_ptr()).data).cast::<u8>() };606for&stringinstrings {607// SAFETY:608 // The sum of all `count` for each `string` equals `full_count`, and since we're609 // iteratively writing each of them to `data`, `copy_non_overlapping` always stays610 // in-bounds for `count` reads of each string and `full_count` writes to `data`.611 //612 // Each `string` must be properly aligned to be a valid slice, and `data` must be613 // properly aligned by `allocate_inner`.614 //615 // `allocate_inner` must return a valid pointer to newly allocated memory, meaning616 // `ptr` and all `string`s should never overlap.617unsafe{618// NOTE: The aligment is checked when we allocate the array.619#[allow(clippy::cast_ptr_alignment)]620match(latin1_encoding, string.variant()) {621 (true, JsStrVariant::Latin1(s)) => {622letcount = s.len();623 ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);624 data = data.cast::<u8>().add(count).cast::<u8>();625 }626 (false, JsStrVariant::Latin1(s)) => {627letcount = s.len();628for(i, byte)ins.iter().enumerate() {629*data.cast::<u16>().add(i) = u16::from(*byte);630 }631 data = data.cast::<u16>().add(count).cast::<u8>();632 }633 (false, JsStrVariant::Utf16(s)) => {634letcount = s.len();635 ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);636 data = data.cast::<u16>().add(count).cast::<u8>();637 }638 (true, JsStrVariant::Utf16(_)) => {639unreachable!("Already checked that it's latin1 encoding")640 }641 }642 }643 }644Self{645// SAFETY: We already know it's a valid heap pointer.646ptr:unsafe{ NonNull::new_unchecked(ptr.as_ptr()) },647 }648 };649650 StaticJsStrings::get_string(&string.as_str()).unwrap_or(string)651 }652653/// Allocates a new [`RawJsString`] with an internal capacity of `str_len` chars.654 ///655 /// # Panics656 ///657 /// Panics if `try_allocate_inner` returns `Err`.658fnallocate_inner(str_len: usize, latin1: bool) -> NonNull<RawJsString> {659matchSelf::try_allocate_inner(str_len, latin1) {660Ok(v) => v,661Err(None) => alloc_overflow(),662Err(Some(layout)) => std::alloc::handle_alloc_error(layout),663 }664 }665666// This is marked as safe because it is always valid to call this function to request any number667 // of `u16`, since this function ought to fail on an OOM error.668/// Allocates a new [`RawJsString`] with an internal capacity of `str_len` chars.669 ///670 /// # Errors671 ///672 /// Returns `Err(None)` on integer overflows `usize::MAX`.673 /// Returns `Err(Some(Layout))` on allocation error.674fntry_allocate_inner(675 str_len: usize,676 latin1: bool,677 ) ->Result<NonNull<RawJsString>,Option<Layout>> {678let(layout, offset) =iflatin1 {679 Layout::array::<u8>(str_len)680 }else{681 Layout::array::<u16>(str_len)682 }683 .and_then(|arr| Layout::new::<RawJsString>().extend(arr))684 .map(|(layout, offset)| (layout.pad_to_align(), offset))685 .map_err(|_|None)?;686687debug_assert_eq!(offset, DATA_OFFSET);688689#[allow(clippy::cast_ptr_alignment)]690// SAFETY:691 // The layout size of `RawJsString` is never zero, since it has to store692 // the length of the string and the reference count.693letinner =unsafe{ alloc(layout).cast::<RawJsString>() };694695// We need to verify that the pointer returned by `alloc` is not null, otherwise696 // we should abort, since an allocation error is pretty unrecoverable for us697 // right now.698letinner = NonNull::new(inner).ok_or(Some(layout))?;699700// SAFETY:701 // `NonNull` verified for us that the pointer returned by `alloc` is valid,702 // meaning we can write to its pointed memory.703unsafe{704// Write the first part, the `RawJsString`.705inner.as_ptr().write(RawJsString {706 tagged_len: TaggedLen::new(str_len, latin1),707 refcount: Cell::new(1),708 data: [0;0],709 });710 }711712debug_assert!({713letinner = inner.as_ptr();714// SAFETY:715 // - `inner` must be a valid pointer, since it comes from a `NonNull`,716 // meaning we can safely dereference it to `RawJsString`.717 // - `offset` should point us to the beginning of the array,718 // and since we requested an `RawJsString` layout with a trailing719 // `[u16; str_len]`, the memory of the array must be in the `usize`720 // range for the allocation to succeed.721unsafe{722 ptr::eq(723 inner.cast::<u8>().add(offset).cast(),724 (*inner).data.as_mut_ptr(),725 )726 }727 });728729Ok(inner)730 }731732/// Creates a new [`JsString`] from `data`, without checking if the string is in the interner.733fnfrom_slice_skip_interning(string: JsStr<'_>) ->Self{734letcount = string.len();735letptr =Self::allocate_inner(count, string.is_latin1());736737// SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer.738letdata =unsafe{ (&rawmut(*ptr.as_ptr()).data).cast::<u8>() };739740// SAFETY:741 // - We read `count = data.len()` elements from `data`, which is within the bounds of the slice.742 // - `allocate_inner` must allocate at least `count` elements, which allows us to safely743 // write at least `count` elements.744 // - `allocate_inner` should already take care of the alignment of `ptr`, and `data` must be745 // aligned to be a valid slice.746 // - `allocate_inner` must return a valid pointer to newly allocated memory, meaning `ptr`747 // and `data` should never overlap.748unsafe{749// NOTE: The aligment is checked when we allocate the array.750#[allow(clippy::cast_ptr_alignment)]751matchstring.variant() {752 JsStrVariant::Latin1(s) => {753 ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);754 }755 JsStrVariant::Utf16(s) => {756 ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);757 }758 }759 }760Self{ ptr }761 }762763/// Creates a new [`JsString`] from `data`.764fnfrom_slice(string: JsStr<'_>) ->Self{765if letSome(s) = StaticJsStrings::get_string(&string) {766returns;767 }768Self::from_slice_skip_interning(string)769 }770771/// Gets the number of `JsString`s which point to this allocation.772#[inline]773 #[must_use]774pub fnrefcount(&self) ->Option<usize> {775ifself.is_static() {776returnNone;777 }778779// SAFETY:780 // `NonNull` and the constructions of `JsString` guarantee that `inner` is always valid.781letrc =unsafe{self.ptr.as_ref().refcount.get() };782Some(rc)783 }784}785786implCloneforJsString {787#[inline]788fnclone(&self) ->Self{789ifself.is_static() {790returnSelf{ ptr:self.ptr };791 }792793// SAFETY: `NonNull` and the constructions of `JsString` guarantee that `inner` is always valid.794letinner =unsafe{self.ptr.as_ref() };795796letstrong = inner.refcount.get().wrapping_add(1);797ifstrong ==0{798 abort()799 }800801 inner.refcount.set(strong);802803Self{ ptr:self.ptr }804 }805}806807implDefaultforJsString {808#[inline]809fndefault() ->Self{810 StaticJsStrings::EMPTY_STRING811 }812}813814implDropforJsString {815#[inline]816fndrop(&mutself) {817// See https://doc.rust-lang.org/src/alloc/sync.rs.html#1672 for details.818819ifself.is_static() {820return;821 }822823// SAFETY: `NonNull` and the constructions of `JsString` guarantees that `raw` is always valid.824letinner =unsafe{self.ptr.as_ref() };825826 inner.refcount.set(inner.refcount.get() -1);827ifinner.refcount.get() !=0{828return;829 }830831// SAFETY:832 // All the checks for the validity of the layout have already been made on `alloc_inner`,833 // so we can skip the unwrap.834letlayout =unsafe{835ifinner.is_latin1() {836 Layout::for_value(inner)837 .extend(Layout::array::<u8>(inner.len()).unwrap_unchecked())838 .unwrap_unchecked()839 .0840.pad_to_align()841 }else{842 Layout::for_value(inner)843 .extend(Layout::array::<u16>(inner.len()).unwrap_unchecked())844 .unwrap_unchecked()845 .0846.pad_to_align()847 }848 };849850// SAFETY:851 // If refcount is 0 and we call drop, that means this is the last `JsString` which852 // points to this memory allocation, so deallocating it is safe.853unsafe{854 dealloc(self.ptr.cast().as_ptr(), layout);855 }856 }857}858859implstd::fmt::DebugforJsString {860#[inline]861fnfmt(&self, f:&mutstd::fmt::Formatter<'_>) -> std::fmt::Result {862self.as_str().fmt(f)863 }864}865866implEqforJsString {}867868macro_rules! impl_from_number_for_js_string {869 ($($module: ident => $($ty:ty),+)+) => {870 $(871 $(872implFrom<$ty>forJsString {873#[inline]874fnfrom(value:$ty) ->Self{875 JsString::from_slice_skip_interning(JsStr::latin1(876$module::Buffer::new().format(value).as_bytes(),877 ))878 }879 }880 )+881 )+882 };883}884885impl_from_number_for_js_string!(886 itoa => i8, i16, i32, i64, i128, u8, u16, u32, u64, u128, isize, usize887 ryu_js => f32, f64888);889890implFrom<&[u16]>forJsString {891#[inline]892fnfrom(s:&[u16]) ->Self{893 JsString::from_slice(JsStr::utf16(s))894 }895}896897implFrom<&str>forJsString {898#[inline]899fnfrom(s:&str) ->Self{900// TODO: Check for latin1 encoding901ifs.is_ascii() {902letjs_str = JsStr::latin1(s.as_bytes());903returnStaticJsStrings::get_string(&js_str)904 .unwrap_or_else(|| JsString::from_slice_skip_interning(js_str));905 }906lets = s.encode_utf16().collect::<Vec<_>>();907 JsString::from_slice_skip_interning(JsStr::utf16(&s[..]))908 }909}910911implFrom<JsStr<'_>>forJsString {912#[inline]913fnfrom(value: JsStr<'_>) ->Self{914 StaticJsStrings::get_string(&value)915 .unwrap_or_else(|| JsString::from_slice_skip_interning(value))916 }917}918919implFrom<&[JsString]>forJsString {920#[inline]921fnfrom(value:&[JsString]) ->Self{922Self::concat_array(&value.iter().map(Self::as_str).collect::<Vec<_>>()[..])923 }924}925926impl<constN: usize> From<&[JsString; N]>forJsString {927#[inline]928fnfrom(value:&[JsString; N]) ->Self{929Self::concat_array(&value.iter().map(Self::as_str).collect::<Vec<_>>()[..])930 }931}932933implFrom<String>forJsString {934#[inline]935fnfrom(s: String) ->Self{936Self::from(s.as_str())937 }938}939940impl<'a> From<Cow<'a, str>>forJsString {941#[inline]942fnfrom(s: Cow<'a, str>) ->Self{943matchs {944 Cow::Borrowed(s) => s.into(),945 Cow::Owned(s) => s.into(),946 }947 }948}949950impl<constN: usize> From<&[u16; N]>forJsString {951#[inline]952fnfrom(s:&[u16; N]) ->Self{953Self::from(&s[..])954 }955}956957implHashforJsString {958#[inline]959fnhash<H: Hasher>(&self, state:&mutH) {960self.as_str().hash(state);961 }962}963964implPartialOrdforJsStr<'_> {965#[inline]966fnpartial_cmp(&self, other:&Self) ->Option<std::cmp::Ordering> {967Some(self.cmp(other))968 }969}970971implOrdforJsString {972#[inline]973fncmp(&self, other:&Self) -> std::cmp::Ordering {974self.as_str().cmp(&other.as_str())975 }976}977978implPartialEqforJsString {979#[inline]980fneq(&self, other:&Self) -> bool {981self.as_str() == other.as_str()982 }983}984985implPartialEq<JsString>for[u16] {986#[inline]987fneq(&self, other:&JsString) -> bool {988ifself.len() != other.len() {989returnfalse;990 }991for(x, y)inself.iter().copied().zip(other.iter()) {992ifx != y {993returnfalse;994 }995 }996true997}998}9991000impl<constN: usize> PartialEq<JsString>for[u16; N] {1001#[inline]1002fneq(&self, other:&JsString) -> bool {1003self[..] ==*other1004 }1005}10061007implPartialEq<[u16]>forJsString {1008#[inline]1009fneq(&self, other:&[u16]) -> bool {1010 other ==self1011}1012}10131014impl<constN: usize> PartialEq<[u16; N]>forJsString {1015#[inline]1016fneq(&self, other:&[u16; N]) -> bool {1017*self== other[..]1018 }1019}10201021implPartialEq<str>forJsString {1022#[inline]1023fneq(&self, other:&str) -> bool {1024self.as_str() == other1025 }1026}10271028implPartialEq<&str>forJsString {1029#[inline]1030fneq(&self, other: &&str) -> bool {1031self.as_str() ==*other1032 }1033}10341035implPartialEq<JsString>forstr {1036#[inline]1037fneq(&self, other:&JsString) -> bool {1038 other ==self1039}1040}10411042implPartialEq<JsStr<'_>>forJsString {1043#[inline]1044fneq(&self, other:&JsStr<'_>) -> bool {1045self.as_str() ==*other1046 }1047}10481049implPartialEq<JsString>forJsStr<'_> {1050#[inline]1051fneq(&self, other:&JsString) -> bool {1052 other ==self1053}1054}10551056implPartialOrdforJsString {1057#[inline]1058fnpartial_cmp(&self, other:&Self) ->Option<std::cmp::Ordering> {1059Some(self.cmp(other))1060 }1061}10621063implFromStrforJsString {1064typeErr= Infallible;10651066#[inline]1067fnfrom_str(s:&str) ->Result<Self,Self::Err> {1068Ok(Self::from(s))1069 }1070}