diff --git a/src/Init/Data/String/Defs.lean b/src/Init/Data/String/Defs.lean index b9d5fa5acb..f864e3d2c4 100644 --- a/src/Init/Data/String/Defs.lean +++ b/src/Init/Data/String/Defs.lean @@ -685,7 +685,20 @@ abbrev endValidPos (s : String) : s.Pos := s.endPos @[deprecated String.toByteArray (since := "2025-11-24")] -abbrev String.bytes (s : String) : ByteArray := +abbrev bytes (s : String) : ByteArray := s.toByteArray +/-- +Returns the length of the string `s`, assuming the string is comprised only of ASCII characters. + +This is implemented as a synonym for `s.utf8ByteSize`, which takes constant time. +-/ +@[inline] +def lengthAssumingAscii (s : String) : Nat := + s.utf8ByteSize + +@[simp] +theorem lengthAssumingAscii_eq {s : String} : s.lengthAssumingAscii = s.utf8ByteSize := + (rfl) + end String diff --git a/src/Init/System/Uri.lean b/src/Init/System/Uri.lean index 033252f293..5e7e5ae046 100644 --- a/src/Init/System/Uri.lean +++ b/src/Init/System/Uri.lean @@ -14,6 +14,7 @@ import Init.Omega import Init.System.Platform import Init.While import Init.Data.String.Length +import Init.Data.Iterators.Combinators.Take public section @@ -93,35 +94,42 @@ a single unicode code point and these will also be decoded correctly. -/ def unescapeUri (s: String) : String := UriEscape.decodeUri s +private def normalizeDriveLetter (uri : String) : String := + -- lower-case drive letters seem to be preferred in URIs + match (uri.chars.take 2).toList with + | [driveLetter, ':'] => if driveLetter.isUpper then uri.decapitalize else uri + | _ => uri + /-- Convert the given FilePath to a "file:///encodedpath" Uri. -/ def pathToUri (fname : System.FilePath) : String := Id.run do let mut uri := fname.normalize.toString if System.Platform.isWindows then - -- normalize drive letter - -- lower-case drive letters seem to be preferred in URIs - if uri.length >= 2 && uri.front.isUpper && String.Pos.Raw.get uri ⟨1⟩ == ':' then - uri := uri.decapitalize + uri := normalizeDriveLetter uri uri := uri.map (fun c => if c == '\\' then '/' else c) uri := uri.foldl (fun s c => s ++ UriEscape.uriEscapeAsciiChar c) "" let result := if uri.startsWith "/" then "file://" ++ uri else "file:///" ++ uri result +-- On Windows, the path "/c:/temp" needs to become "C:/temp" +private def normalizeDriveExpression (p : String.Slice) : String := + match (p.chars.take 3).toList with + | ['/', driveLetter, ':'] => + if driveLetter.isAlpha then + (p.drop 1).copy.capitalize + else p.copy + | _ => p.copy + /-- Convert the given uri to a FilePath stripping the 'file://' prefix, ignoring the optional host name. -/ -def fileUriToPath? (uri : String) : Option System.FilePath := Id.run do - if !uri.startsWith "file://" then - none - else - let mut p := (unescapeUri uri).drop "file://".length |>.copy - p := p.dropWhile (λ c => c != '/') |>.copy -- drop the hostname. +def fileUriToPath? (uri : String) : Option System.FilePath := + match (unescapeUri uri).dropPrefix? "file://" with + | none => none + | some p => + let p := p.dropWhile (fun c => c != '/') -- drop the hostname. if System.Platform.isWindows then - -- On Windows, the path "/c:/temp" needs to become "C:/temp" - if p.length >= 2 && - p.front == '/' && (String.Pos.Raw.get p ⟨1⟩).isAlpha && String.Pos.Raw.get p ⟨2⟩ == ':' then - -- see also `pathToUri` - p := String.Pos.Raw.modify (p.drop 1).copy 0 .toUpper - p := p.map (fun c => if c == '/' then '\\' else c) - some p + some ((normalizeDriveExpression p).map (fun c => if c == '/' then '\\' else c)) + else + some p.copy end Uri end System diff --git a/src/lake/Lake/CLI/Main.lean b/src/lake/Lake/CLI/Main.lean index e724e00340..6a30236c32 100644 --- a/src/lake/Lake/CLI/Main.lean +++ b/src/lake/Lake/CLI/Main.lean @@ -228,9 +228,9 @@ def validateRepo? (repo : String) : Option String := Id.run do return "invalid characters in repository name" match repo.split '/' |>.toStringList with | [owner, name] => - if owner.length > 39 then + if owner.lengthAssumingAscii > 39 then return "invalid repository name; owner must be at most 390 characters long" - if name.length > 100 then + if name.lengthAssumingAscii > 100 then return "invalid repository name; owner must be at most 100 characters long" | _ => return "invalid repository name; must contain exactly one '/'" return none @@ -271,7 +271,7 @@ def lakeLongOption : (opt : String) → CliM PUnit modifyThe LakeOptions ({· with scope? := some (.ofRepo repo)}) | "--platform" => do let platform ← takeOptArg "--platform" "cache platform" - if platform.length > 100 then + if platform.chars.length > 100 then error "invalid platform; platform is expected to be at most 100 characters long" modifyThe LakeOptions ({· with platform? := some <| .ofString platform}) | "--toolchain" => do diff --git a/src/lake/Lake/CLI/Shake.lean b/src/lake/Lake/CLI/Shake.lean index 0174245ca9..88672ba695 100644 --- a/src/lake/Lake/CLI/Shake.lean +++ b/src/lake/Lake/CLI/Shake.lean @@ -326,7 +326,7 @@ where addExplanation (j : ModuleIdx) (k : NeedsKind) (use def_ : Name) (deps : Explanations) : Explanations := if if let some (some (name', _)) := deps[(j, k)]? then - decide (use.toString.length < name'.toString.length) + decide (use.toString.chars.length < name'.toString.chars.length) else true then deps.insert (j, k) (use, def_) diff --git a/src/lake/Lake/Config/Cache.lean b/src/lake/Lake/Config/Cache.lean index 208bca7d47..1fe4afa5be 100644 --- a/src/lake/Lake/Config/Cache.lean +++ b/src/lake/Lake/Config/Cache.lean @@ -497,7 +497,7 @@ public def system : CachePlatform := ⟨System.Platform.target⟩ @[inline] public def ofString (s : String) : CachePlatform := ⟨s⟩ /-- Returns the length of the platform identifier in Unicode code points. -/ -public def length (self : CachePlatform) : Nat := self.raw.length +public def length (self : CachePlatform) : Nat := self.raw.chars.length /-- Returns a string representation of the platform identifier. -/ public protected def toString (self : CachePlatform) : String := @@ -529,7 +529,7 @@ public def ofString (s : String) : CacheToolchain := ⟨normalizeToolchain s⟩ @[inline] public def ofElanToolchain (s : String) : CacheToolchain := ⟨s⟩ /-- Returns the length of the toolchain identifier in Unicode code points. -/ -public def length (self : CacheToolchain) : Nat := self.raw.length +public def length (self : CacheToolchain) : Nat := self.raw.chars.length /-- Returns a string representation of the toolchain identifier. -/ public protected def toString (self : CacheToolchain) : String :=