给定一个大型目录树,其中包含许多文件和子目录,必须完全遍历:
什么更有效:
深度优先搜索:递归FindFirst / Next,在遇到每个子目录时立即处理该子目录。
广度优先搜索:迭代式FindFirst / Next维护要处理的目录列表,并在遇到子目录时将子目录追加到此列表中,以供以后处理。
代码1:
深度优先搜索:
var
DirsWithJpg: TArray
procedure CheckDirectory(const _Dir: string);
var
DirBs: string;
sr: TSearchRec;
ContainsFiles: boolean;
ContainsSubdirs: boolean;
begin
ContainsFiles := False;
ContainsSubdirs := False;
DirBs := IncludeTrailingPathDelimiter(_Dir);
if FindFirst(DirBs + '*.*', faAnyFile, sr) = 0 then begin
try
repeat
if (sr.Name = '.') or (sr.Name = '..') then begin
// ignore
end else if (sr.Attr and faDirectory) <> 0 then begin
// directory
ContainsSubdirs := true;
// recursive Depth First Search
CheckDirectory(DirBs + sr.Name);
end else if sr.Attr and (faHidden or faSysFile or faSymLink) = 0 then begin
// regular file
if not ContainsFiles then begin
if SameText(ExtractFileExt(sr.Name), '.jpg') then begin
ContainsFiles := true;
end;
end;
end else begin
// ignore special files
end;
until FindNext(sr) <> 0;
finally
FindClose(sr);
end;
end;
if ContainsFiles then begin
if ContainsSubdirs then begin
m_Result.Lines.Add(Format('Directory %s contains files and subdirectories, ignoring it.',
[_Dir]));
end else begin
DirsWithJpg := DirsWithJpg + [_Dir];
end;
end;
end;
var
Stopwatch: TStopwatch;
begin
Stopwatch.Reset;
Stopwatch.Start;
CheckDirectory('\\server\share\dir');
Stopwatch.Stop;
m_Result.Lines.Add(Format('FindFirst/Next DBF: Found %d dirs in %.3f seconds',
[Length(DirsWithJpg), Stopwatch.Elapsed.TotalSeconds]));
end;
代码2:
广度优先搜索:
var
DirsWithJpg: TArray
DirsToSearch: TStringList;
procedure CheckDirectory(const _Dir: string);
var
DirBs: string;
sr: TSearchRec;
ContainsFiles: boolean;
ContainsSubdirs: boolean;
begin
ContainsFiles := False;
ContainsSubdirs := False;
DirBs := IncludeTrailingPathDelimiter(_Dir);
if FindFirst(DirBs + '*.*', faAnyFile, sr) = 0 then begin
try
repeat
if (sr.Name = '.') or (sr.Name = '..') then begin
// ignore
end else if (sr.Attr and faDirectory) <> 0 then begin
// directory
ContainsSubdirs := true;
// add to list for later processing
DirsToSearch.Add(DirBs + sr.Name);
end else if sr.Attr and (faHidden or faSysFile or faSymLink) = 0 then begin
// regular file
if not ContainsFiles then begin
if SameText(ExtractFileExt(sr.Name), '.jpg') then begin
ContainsFiles := true;
end;
end;
end else begin
// ignore special files
end;
until FindNext(sr) <> 0;
finally
FindClose(sr);
end;
end;
if ContainsFiles then begin
if ContainsSubdirs then begin
m_Result.Lines.Add(Format('Directory %s contains files and subdirectories, ignoring it.',
[_Dir]));
end else begin
DirsWithJpg := DirsWithJpg + [_Dir];
end;
end;
end;
var
Stopwatch: TStopwatch;
begin
Stopwatch.Reset;
Stopwatch.Start;
DirsToSearch := TStringList.Create;
try
DirsToSearch.Add('\\server\share\dir');
while DirsToSearch.count > 0 do begin
CheckDirectory(DirsToSearch[0]);
DirsToSearch.Delete(0);
end;
finally
FreeAndNil(DirsToSearch);
end;
Stopwatch.Stop;
m_Result.Lines.Add(Format('FindFirst/Next BFS: Found %d dirs in %.3f seconds',
[Length(DirsWithJpg), Stopwatch.Elapsed.TotalSeconds]));
end;
代码3:
var
DirsWithJpg: TArray
procedure CheckDirectory(const _Dir: string);
var
Files: TArray
Dirs: TArray
i: Integer;
begin
Files := TDirectory.GetFiles(_Dir, '*.jpg');
Dirs := TDirectory.GetDirectories(_Dir);
if Length(Files) > 0 then begin
if Length(Dirs) > 0 then begin
m_Result.Lines.Add(Format('Directory %s contains files and subdirectories, ignoring it.',
[_Dir]));
end else
DirsWithJpg := DirsWithJpg + [_Dir];
end else begin
for i := Low(Dirs) to High(Dirs) do
CheckDirectory(Dirs[i]);
end;
end;
var
Stopwatch: TStopwatch;
begin
Stopwatch.Reset;
Stopwatch.Start;
CheckDirectory('\\server\share\dir');
Stopwatch.Stop;
m_Result.Lines.Add(Format('DirGetFiles: Found %d dirs in %.3f seconds',
[Length(DirsWithJpg), Stopwatch.Elapsed.TotalSeconds]));
end;
代码4:
function FindFirstFileEx(lpFileName: LPWSTR; fInfoLevelId: TFindexInfoLevels;
lpFindFileData: Pointer; fSearchOp: TFindexSearchOps; lpSearchFilter: PWin32FindData;
dwAdditionalFlags: DWORD): THandle; stdcall;
external kernelbase Name 'FindFirstFileExW';
function FindNextFile(hFindFile: THandle; lpFindFileData: PWin32FindData): BOOL; stdcall;
external kernelbase Name 'FindNextFileW';
procedure TForm1.b_FindFirstExNextBFSClick(Sender: TObject);
var
DirsWithJpg: TArray
DirsToSearch: TStringList;
procedure CheckDirectory(const _Dir: string);
const
FIND_FIRST_EX_LARGE_FETCH = 2;
var
DirBs: string;
SearchHandle: THandle;
sr: TWin32FindData;
ContainsFiles: boolean;
ContainsSubdirs: boolean;
fn: string;
begin
ContainsFiles := False;
ContainsSubdirs := False;
DirBs := IncludeTrailingPathDelimiter(_Dir);
SearchHandle := FindFirstFileEx(PChar(DirBs + '*.*'), FindExInfoBasic, @sr, FindExSearchNameMatch,
nil, FIND_FIRST_EX_LARGE_FETCH);
if SearchHandle <> INVALID_HANDLE_VALUE then begin
try
repeat
fn := sr.cFileName;
if (fn = '.') or (fn = '..') then begin
// ignore
end else if (sr.dwFileAttributes and FILE_ATTRIBUTE_DIRECTORY) <> 0 then begin
// directory
ContainsSubdirs := true;
// add to list for later processing
DirsToSearch.Add(DirBs + fn);
end else if sr.dwFileAttributes
and (FILE_ATTRIBUTE_HIDDEN or FILE_ATTRIBUTE_SYSTEM or FILE_ATTRIBUTE_REPARSE_POINT) = 0 then begin
// regular file
if not ContainsFiles then begin
if SameText(ExtractFileExt(fn), '.jpg') then begin
ContainsFiles := true;
end;
end;
end else begin
// ignore special files
end;
until not FindNextFile(SearchHandle, @sr);
finally
Winapi.Windows.FindClose(SearchHandle);
end;
end;
if ContainsFiles then begin
if ContainsSubdirs then begin
m_Result.Lines.Add(Format('Directory %s contains files and subdirectories, ignoring it.',
[_Dir]));
end else begin
DirsWithJpg := DirsWithJpg + [_Dir];
end;
end;
end;
var
Stopwatch: TStopwatch;
begin
Stopwatch.Reset;
Stopwatch.Start;
DirsToSearch := TStringList.Create;
try
DirsToSearch.Add('\\server\share\dir');
while DirsToSearch.count > 0 do begin
CheckDirectory(DirsToSearch[0]);
DirsToSearch.Delete(0);
end;
finally
FreeAndNil(DirsToSearch);
end;
Stopwatch.Stop;
m_Result.Lines.Add(Format('FindFirstExNext BFS: Found %d dirs in %.3f seconds',
[Length(DirsWithJpg), Stopwatch.Elapsed.TotalSeconds]));
end;
结论:
无论使用“广度优先”搜索还是“深度优先”搜索,性能都是相同的。
从SysUtils.FindFirst / FindNext切换到仅具有基本文件信息和标志FIND_FIRST_EX_LARGE_FETCH的Windows.FindFirstFileEx / FindNextFile给我带来了大约25%的性能提升,这还不错。
我发现无法从使用TDirectory中受益。我所做的唯一测试比FindFirst / Next性能差。
测试环境:
Windows 8.1客户端通过1 GBit以太网以最小的流量访问Samba服务器上的共享(这是周末)。该计算机是具有3.4 GHz,8核和16 GB RAM的相当快的Intel Xeon E3。
该服务器具有一个Intel Core I9-9940X处理器,该处理器具有16个内核和64 GB的RAM。共享存储在2 TB英特尔M2 NVMe SSD上。它正在运行Ubuntu 18.04.4 LTS和Samba 4.7.6-Ubuntu。
该测试程序是用Delphi 10.4编译的。使用Debug和Release配置进行构建之间没有显着差异。
目录树深4层,在前3层中仅包含几个目录,但在最深层中总共包含898个目录。这些目录中每个目录的最深层都包含数百个至数千个jpeg文件。
该代码的目的是在任何级别上查找包含至少一个jpeg文件但不包含子目录的所有目录。
所有测试以不同的顺序运行了几次,以防止任何缓存影响而扭曲结果。
Copyright © 2014 DelphiW.com 开发 源码 文档 技巧 All Rights Reserved
晋ICP备14006235号-8 晋公网安备 14108102000087号
执行时间: 0.043815135955811 seconds