★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★
➤微信公衆號:山青詠芝(shanqingyongzhi)
➤博客園地址:山青詠芝(https://www.cnblogs.com/strengthen/)
➤GitHub地址:https://github.com/strengthen/LeetCode
➤原文地址:http://www.javashuo.com/article/p-plapvfln-me.html
➤若是連接不是山青詠芝的博客園地址,則多是爬取做者的文章。
➤原文已修改更新!強烈建議點擊原文地址閱讀!支持做者!支持原創!
★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★html
Given a list of directory info including directory path, and all the files with contents in this directory, you need to find out all the groups of duplicate files in the file system in terms of their paths.git
A group of duplicate files consists of at least two files that have exactly the same content.github
A single directory info string in the input list has the following format:微信
"root/d1/d2/.../dm f1.txt(f1_content) f2.txt(f2_content) ... fn.txt(fn_content)"
app
It means there are n files (f1.txt
, f2.txt
... fn.txt
with content f1_content
, f2_content
... fn_content
, respectively) in directory root/d1/d2/.../dm
. Note that n >= 1 and m >= 0. If m = 0, it means the directory is just the root directory.優化
The output is a list of group of duplicate file paths. For each group, it contains all the file paths of the files that have the same content. A file path is a string that has the following format:ui
"directory_path/file_name.txt"
this
Example 1:spa
Input: ["root/a 1.txt(abcd) 2.txt(efgh)", "root/c 3.txt(abcd)", "root/c/d 4.txt(efgh)", "root 4.txt(efgh)"] Output: [["root/a/2.txt","root/c/d/4.txt","root/4.txt"],["root/a/1.txt","root/c/3.txt"]]
Note:code
Follow-up beyond contest:
給定一個目錄信息列表,包括目錄路徑,以及該目錄中的全部包含內容的文件,您須要找到文件系統中的全部重複文件組的路徑。一組重複的文件至少包括二個具備徹底相同內容的文件。
輸入列表中的單個目錄信息字符串的格式以下:
"root/d1/d2/.../dm f1.txt(f1_content) f2.txt(f2_content) ... fn.txt(fn_content)"
這意味着有 n 個文件(f1.txt
, f2.txt
... fn.txt
的內容分別是 f1_content
, f2_content
... fn_content
)在目錄 root/d1/d2/.../dm
下。注意:n>=1 且 m>=0。若是 m=0,則表示該目錄是根目錄。
該輸出是重複文件路徑組的列表。對於每一個組,它包含具備相同內容的文件的全部文件路徑。文件路徑是具備下列格式的字符串:
"directory_path/file_name.txt"
示例 1:
輸入: ["root/a 1.txt(abcd) 2.txt(efgh)", "root/c 3.txt(abcd)", "root/c/d 4.txt(efgh)", "root 4.txt(efgh)"] 輸出: [["root/a/2.txt","root/c/d/4.txt","root/4.txt"],["root/a/1.txt","root/c/3.txt"]]
注:
超越競賽的後續行動:
584ms
1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 var ctp = [String: [String]]() 4 for path in paths { 5 let comps = path.split(separator: " ") 6 let dir = comps[0] 7 for file in comps[1...] { 8 let (fullPath, contents) = analyzeFile(file, dir) 9 ctp[contents, default: []].append(fullPath) 10 } 11 } 12 return Array(ctp.values).filter { $0.count > 1 } 13 } 14 15 private func analyzeFile(_ file: String.SubSequence, _ dir: String.SubSequence) -> (String, String) { 16 let startIndex = file.index(of: "(")! 17 let endIndex = file.index(before: file.endIndex) 18 let contents = String(file[file.index(after: startIndex)..<endIndex]) 19 let path = String(file[file.startIndex..<startIndex]) 20 return ("\(dir)/\(path)", contents) 21 } 22 }
700ms
1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 var map = [String: [String]]() 4 for path in paths { 5 let contents = extractContent(path) 6 for content in contents { 7 map[content[1], default: [String]()].append(content[0]) 8 } 9 } 10 var result = [[String]]() 11 for key in map.keys { 12 if let tmpPaths = map[key] { 13 if tmpPaths.count > 1 { 14 result.append(tmpPaths) 15 } 16 } 17 } 18 return result 19 } 20 21 private func extractContent(_ str: String) -> [[String]] { 22 let arr = str.split(separator: " ") 23 let root = arr[0] 24 var result = [[String]]() 25 for i in 1 ..< arr.count { 26 let str = arr[i] 27 let left = str.firstIndex(of: "(")! 28 let right = str.lastIndex(of: ")")! 29 let filename = String(str[..<left]) 30 let content = String(str[left ... right]) 31 result.append(["\(root)/\(filename)", content]) 32 } 33 return result 34 } 35 }
908ms
1 class Solution { 2 3 typealias Content = String 4 typealias FilePath = String 5 typealias ExtractedContentPath = (content: Content, filepath: FilePath) 6 7 func findDuplicate(_ paths: [String]) -> [[String]] { 8 9 let contentFileTable: [Content: [FilePath]] = paths.lazy 10 .flatMap { self.extractedContentPaths($0) } 11 .reduce(into: [Content: [FilePath]]()){ (dict: inout [Content: [FilePath]], extractedContentPath: ExtractedContentPath) in 12 dict[extractedContentPath.content, default: [FilePath]()].append(extractedContentPath.filepath) 13 } 14 15 return contentFileTable.values.filter { $0.count > 1 } 16 17 } 18 19 private func extractedContentPaths(_ input: String) -> [ExtractedContentPath] { 20 let tokens = input.components(separatedBy: .whitespaces) 21 let directory = tokens.first! 22 return tokens.indices.dropFirst() 23 .lazy.map { extractedContentPath(from: tokens[$0], directory: directory) } 24 } 25 26 private func extractedContentPath(from fileAndContent: String, directory: String) -> ExtractedContentPath { 27 let tokens = fileAndContent.dropLast().components(separatedBy: "(") 28 return ExtractedContentPath(content: tokens.last!, filepath: "\(directory)/\(tokens.first!)") 29 } 30 }
992ms
1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 var groups = [String: Array<String>]() 4 5 for info in paths { 6 let list = parse(info: info) 7 8 for file in list { 9 groups[file.1, default: Array<String>()].append(file.0) 10 } 11 } 12 13 var result = [[String]]() 14 15 for group in Array(groups.values) { 16 if group.count > 1 { 17 result.append(group) 18 } 19 } 20 21 return result 22 } 23 24 func parse(info: String) -> [(String, String)] { 25 var components = info.components(separatedBy: " ") 26 let path = components.removeFirst() 27 28 var result = [(String, String)]() 29 30 let splitCharSet = CharacterSet(charactersIn: "()") 31 while !components.isEmpty { 32 33 let entry = components.removeFirst() 34 let parts = entry.components(separatedBy: splitCharSet) 35 36 let file = "\(path)/\(parts[0])" 37 let contents = parts[1] 38 39 result.append((file, contents)) 40 } 41 42 return result 43 } 44 }
1232ms
1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 var fileMapPaths = [String: [String]]() 4 paths.forEach { (str) in 5 let arrStrs = str.split(separator: " ") 6 let dir = arrStrs[0] 7 for i in 1..<arrStrs.count { 8 let fileInfo = arrStrs[i] 9 let subArrStr = fileInfo.split(separator: "(") 10 let md5 = String(subArrStr[1]) 11 let file = String(subArrStr[0].prefix(subArrStr[0].count)) 12 let filePath = dir + "/" + file 13 var mapPaths = fileMapPaths[md5] ?? [String]() 14 mapPaths.append(filePath) 15 fileMapPaths[md5] = mapPaths 16 } 17 } 18 let ans = fileMapPaths.values.filter { $0.count > 1} 19 return ans 20 } 21 }
1264ms
1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 //create a dictionary with [content: [filepath]], output the value count which is equal and greater than 2 4 var contentToFiles = [String: [String]]() 5 for path in paths { 6 let params = path.split(separator: " ") 7 guard let dir = params.first else { 8 continue 9 } 10 for i in 1 ..< params.count { 11 let fileParams = params[i].split(separator: "(") 12 guard let fileName = fileParams.first, let fileContentWithExtraInfo = fileParams.last else { 13 continue 14 } 15 let fileContent = String(describing: fileContentWithExtraInfo.dropLast()) 16 let filePath = String(describing:dir) + "/" + String(describing: fileName ) 17 contentToFiles[fileContent] = contentToFiles[fileContent, default:[]] + [filePath] 18 } 19 } 20 return contentToFiles.values.filter{$0.count >= 2} 21 22 } 23 }
1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 var ans:[[String]] = [[String]]() 4 var map:[String:[String]] = [String:[String]]() 5 for path in paths 6 { 7 var temp:[String] = path.components(separatedBy:" ") 8 var root:String = temp[0] 9 for str in temp 10 { 11 var begin:Int = str.findLast("(") 12 var end:Int = str.findFirst(")") 13 if begin + 1 < end 14 { 15 var name:String = str.subString(begin+1,end) 16 var s:String = root + "/" + str.subStringTo(begin - 1) 17 if map[name] == nil 18 { 19 map[name] = [s] 20 } 21 else 22 { 23 map[name]!.append(s) 24 } 25 } 26 } 27 } 28 for val in map.values 29 { 30 if val.count > 1 31 { 32 ans.append(val) 33 } 34 } 35 return ans 36 } 37 } 38 39 //String擴展 40 extension String { 41 // 截取字符串:從起始處到index 42 // - Parameter index: 結束索引 43 // - Returns: 子字符串 44 func subStringTo(_ index: Int) -> String { 45 let theIndex = self.index(self.startIndex,offsetBy:min(self.count,index)) 46 return String(self[startIndex...theIndex]) 47 } 48 49 // 截取字符串:指定索引和字符數 50 // - begin: 開始截取處索引 51 // - count: 截取的字符數量 52 func subString(_ begin:Int,_ count:Int) -> String { 53 let start = self.index(self.startIndex, offsetBy: max(0, begin)) 54 let end = self.index(self.startIndex, offsetBy: min(self.count, begin + count)) 55 return String(self[start..<end]) 56 } 57 58 //從0索引處開始查找是否包含指定的字符串,返回Int類型的索引 59 //返回第一次出現的指定子字符串在此字符串中的索引 60 func findFirst(_ sub:String)->Int { 61 var pos = -1 62 if let range = range(of:sub, options: .literal ) { 63 if !range.isEmpty { 64 pos = self.distance(from:startIndex, to:range.lowerBound) 65 } 66 } 67 return pos 68 } 69 70 //從0索引處開始查找是否包含指定的字符串,返回Int類型的索引 71 //返回最後出現的指定子字符串在此字符串中的索引 72 func findLast(_ sub:String)->Int { 73 var pos = -1 74 if let range = range(of:sub, options: .backwards ) { 75 if !range.isEmpty { 76 pos = self.distance(from:startIndex, to:range.lowerBound) 77 } 78 } 79 return pos 80 } 81 }
1344ms
1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 var mapping = [String: [String]]() 4 for i in 0..<paths.count { 5 let arr = paths[i].components(separatedBy: " ") 6 let base = arr[0] + "/" 7 for j in 1..<arr.count { 8 let arrSplit = arr[j].components(separatedBy: "(") 9 mapping[arrSplit[1], default:[String]()].append(base + arrSplit[0]) 10 } 11 } 12 return Array(mapping.values).filter{$0.count > 1} 13 } 14 }