use anyhow::{Context, Result, anyhow, bail};
use log::*;
mod gparser;
#[macro_use]
mod messages;
use messages::{explains, explains_all};
mod linkcheck;
mod utils;
mod recode;
use recode::{wrong_line_endings2crlf, wrong_line_endings2lf};
mod filemagic;
use filemagic::LineEnding;
use linkcheck::LinkCheck;
use std::fmt;
use std::fmt::Display;
use std::str;
use utils::*;
use scoped_threadpool::Pool;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::os::unix::fs::MetadataExt;
use tempfile::Builder;
use once_cell::sync::Lazy; // 1.3.1
use colored::*;
use regex::Regex;
use std::fs::File;
use std::fs::Metadata;
use std::io::BufReader;
use std::io::prelude::*;
use std::os::unix::fs::FileTypeExt;
use std::os::unix::fs::PermissionsExt;
use std::{fs, process};
use std::ffi::OsStr;
use std::io::Read;
use std::path::Path;
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, Ordering};
use rustc_hash::{FxHashMap, FxHashSet};
use std::time::SystemTime;
use std::fmt::Arguments;
use std::sync::mpsc::{Sender, channel};
use clap::builder::PossibleValue;
use clap::{Command, CommandFactory, Parser, ValueEnum, ValueHint};
//use clap_complete::{generate, Generator, Shell};
use clap_complete::{Generator, shells};
use clap_complete_nushell::Nushell;
// Hand-rolled so it can work even when `derive` feature is disabled
impl ValueEnum for Shell {
fn value_variants<'a>() -> &'a [Self] {
&[
Shell::Bash,
Shell::Elvish,
Shell::Fish,
Shell::PowerShell,
Shell::Zsh,
Shell::Nu,
]
}
fn get_config_file_name() -> Result<Option<String>> {
if let Some(config_file) = &ARGS.config_file {
return Ok(Some(config_file.to_string()));
}
let home_dir = match home::home_dir() {
Some(path) => path.display().to_string(),
None => panic!("Impossible to get your home dir!"),
};
let config_files = [".ctan/pkgcheck.yml", ".config/ctan/pkgcheck.yml"];
for f in config_files {
let config_file_abs_path = format!("{}/{}", home_dir, f);
if Path::new(&config_file_abs_path).exists() {
return Ok(Some(config_file_abs_path));
}
}
Ok(None)
}
if let Some(config_filename) = get_config_file_name()? {
i0008!(config_filename);
let data = fs::read_to_string(&config_filename)
.with_context(|| format!("Config file {} could not be read", &config_filename))?;
let path_exceptions = serde_yaml::from_str::<PathExceptions>(&data).with_context(|| {
format!(
"Problem with YAML content of config file {}",
&config_filename
)
})?;
for play in &path_exceptions.tds_path_exceptions {
// check if package name is already in pkg_replacements hash
let old_val = pkg_replacements.get(&play.pkg);
if let Some(ov) = old_val {
if ARGS.verbose {
if ov == &play.tpkg {
w0009!(play.pkg, play.tpkg);
} else {
i0009!(play.pkg, ov, play.tpkg);
}
}
}
pkg_replacements.insert(play.pkg.clone(), play.tpkg.clone());
}
};
Ok(pkg_replacements)
}
fn _get_devno(entry: &DirEntry) -> u64 {
let meta = fs::metadata(entry.path().to_str().unwrap());
match meta {
Ok(m) => m.dev(),
_ => 0,
}
}
#[derive(Parser, Debug, PartialEq)]
#[clap(author, version, about, long_about = None)]
#[command(arg_required_else_help(true))]
struct Args {
#[arg(short = 'I', long = "ignore-dupes", help = "Ignore dupes")]
ignore_dupes: bool,
#[arg(long = "ignore-same-named", help = "Ignore same-named files")]
ignore_same_named: bool,
#[arg(short = 'v', long = "verbose", help = "Verbose operation?")]
verbose: bool,
#[arg(short = 'L', long = "correct-le", help = "Correct line endings")]
correct_le: bool,
#[arg(short = 'C', long = "correct-perms", help = "Correct permissions")]
correct_perms: bool,
#[arg(long = "no-colors", help = "Don't display messages in color")]
no_colors: bool,
#[arg(long = "urlcheck", help = "Check URLs found in README files")]
urlcheck: bool,
#[arg(long = "warnings-no-errors", help = "Don't treat warnings as errors")]
warnings_no_errors: bool,
#[arg(short = 'T', long = "tds-zip", help = "tds zip archive", group = "tds", value_hint = ValueHint::FilePath)]
tds_zip: Option<String>,
#[arg(
short = 'e',
long = "explain",
help = "Explain error or warning message",
group = "only_one"
)]
explain: Option<String>,
#[arg(
long = "explain-all",
help = "Explains all error or warning messages",
group = "only_one"
)]
explain_all: bool,
#[arg(long = "generate-completion", group = "only_one", value_enum)]
generator: Option<Shell>,
#[arg(
long = "show-temp-endings",
help = "Show file endings for temporary files",
group = "only_one"
)]
show_tmp_endings: bool,
#[arg(short = 'd', long = "package-dir", help = "Package directory", value_hint = ValueHint::DirPath)]
pkg_dir: Option<String>,
#[arg(long = "config-file", help = "Specify config file to use", value_hint = ValueHint::FilePath)]
config_file: Option<String>,
}
// In the pas we took care to avoid visiting a single inode twice, which takes care of (false positive) hardlinks.
// Now we want to know if there is a hardlink in the package directory
#[cfg(unix)]
fn check_inode(set: &mut FxHashMap<(u64, u64), Vec<String>>, filename: &str, meta: &Metadata) {
set.entry((get_devno(meta), meta.ino()))
.or_default()
.push(filename.to_string());
}
if let Some(generator) = ARGS.generator {
let mut cmd = Args::command();
eprintln!("Generating completion file for {:?}...", generator);
print_completions(generator, &mut cmd);
process::exit(0)
}
if ARGS.explain_all {
explains_all();
process::exit(0);
}
if ARGS.show_tmp_endings {
show_tmp_endings();
process::exit(0);
}
// read yaml config file if one is given explicitly or implicitly
let pkg_replace: FxHashMap<String, String> = read_yaml_config()?;
let pkg_dir = match &ARGS.pkg_dir {
None => {
return Err(anyhow!("Specify a directory to check (use option -d)"));
}
Some(d) => {
// make sure the given directory ends with a '/' (slash)
let ds: String = if d.ends_with('/') {
d.to_string()
} else {
let d_s = d.to_string();
d_s + "/"
};
exists_dir(&ds).with_context(|| format!("Specified package directory {}", &ds))?;
ds
}
};
let tds_zip = &ARGS.tds_zip;
// let's check if the specified TDS archive
// - does exist?
// - is a zip archive?
if let Some(tds_zip) = tds_zip {
exists_file(tds_zip).with_context(|| format!("Specified TDS zip archive {}", &tds_zip))?;
let pkg_name = get_package_name_from_tds_archive_name(tds_zip)?;
let mut fmagic = filemagic::Filetype::new();
if fmagic
.analyze(tds_zip)
.with_context(|| format!("Specified TDS archive {}", &tds_zip))?
!= filemagic::Mimetype::Zip
{
bail!("TDS archive {} is not a zip archive", &tds_zip);
}
if let Some(hashes) = check_package(&pkg_dir, &Some(tds_zip))? {
check_tds_archive(tds_zip, &hashes, &pkg_replace, &pkg_name)?;
}
} else {
let _ = check_package(&pkg_dir, &None)?;
}
//
// if let Some(hashes) = check_package(&pkg_dir, tds_zip)? {
// if let Some(tds_zip) = tds_zip {
// check_tds_archive(tds_zip, &hashes, &pkg_replace)?;
// }
// }
if ARGS.correct_perms || ARGS.correct_le {
process::exit(0);
}
if ERROR_OCCURRED.load(Ordering::Relaxed)
|| (WARNING_OCCURRED.load(Ordering::Relaxed) && !ARGS.warnings_no_errors)
{
process::exit(1);
}
process::exit(0);
}
fn print_duplicates(hashes: &DupHashes) {
let mut total_dupes = 0;
let mut total_files = 0;
let mut total_size = 0;
let mut header_printed = false;
for (k, paths) in hashes.iter() {
let (sz, _hash) = k;
total_files += paths.plen;
total_size += sz * (paths.plen - 1) as u64;
total_dupes += (paths.plen - 1) as u64;
info!("Size: {}", sz);
for p in &paths.dupes {
if let DPath::Both(p) = p {
let ps = p.as_path().to_str().unwrap();
info!(" >>> {}", ps);
}
}
//eprintln!();
}
if ARGS.verbose && total_dupes > 0 {
info!("Duplicate statistics");
info!(" Found {} duplicate files", total_files);
info!(" Size of duplicate files: {}", total_size);
}
}
fn get_filetype(entry: &DirEntry) -> FType {
match entry.metadata() {
Ok(mt) => {
let ft = mt.file_type();
if ft.is_symlink() {
return FType::Symlink;
}
if ft.is_dir() {
return FType::Directory;
}
if ft.is_block_device() {
return FType::BlockDevice;
}
if ft.is_char_device() {
return FType::CharDevice;
}
if ft.is_fifo() {
return FType::Fifo;
}
if ft.is_socket() {
return FType::Socket;
}
FType::Regular
}
Err(e) => FType::Error(format!("{}", e)),
}
}
//
// read file into buffer[u8]
// then convert into string
//
fn check_generated_files(entry: &str, generated: &mut GeneratedHashMap) {
// unwrap() is ok here as we only call this function for files,
// specifically .ins or .dtx files
let entry_fname = filename(entry).unwrap().to_string();
// the name of the .ins resp. .dtx without extension
let entry_base = &entry_fname[0..entry_fname.len() - 4];
let fhdl = File::open(entry);
match fhdl {
Ok(mut f) => {
let mut buf = Vec::new();
match f.read_to_end(&mut buf) {
Ok(_bytes_read) => {
if let Some(found) =
gparser::parse_generate(&String::from_utf8_lossy(&buf.clone()))
{
for fname in found {
// If the filename in the 'file{} statement contains `\jobname`
// we replace jobname with the .dtx resp. .ins filename (without extension)
// before we investigate further
let fname1 = fname.replace("\\jobname", entry_base);
// If the filename in the generate statement contains a path component
// we ignore it so that a generated file will be reported even if it is
// in a different place in the package directory which sometimes
// happens in uploaded packages
let fname_opt = utils::filename(&fname1);
if fname_opt.is_none() {
continue;
}
let filename = fname_opt.unwrap();
// As we request a README in the top level directory of
// a package we ignore if a README was generated by an
// .ins or .dtx file
// CAVEAT: If this happens in a subdirectory it could be an error!!!!
if is_readme(filename) {
continue;
}
// Ignore generated pdf, html, and css files
if fname.ends_with(".pdf")
|| fname.ends_with(".html")
|| fname.ends_with(".css")
{
continue;
}
let mut lcnames: FxHashMap<PathBuf, Vec<(PathBuf, FileKind)>> = FxHashMap::default();
let dir_entry = Path::new(tds_zip);
let p = get_perms(dir_entry)?;
if !owner_has(p, 4) || !others_have(p, 4) || x_bit_set(p) {
e0024!(tds_zip, perms_to_string(p));
if ARGS.correct_perms {
i0005!(&tds_zip);
set_perms(tds_zip, 0o664)?;
}
};
let ut = Utils::new(utils::CheckType::Tds);
let tmp_dir = Builder::new()
.prefix("pkgcheck")
.tempdir()
.with_context(|| "creating tempdir")?;
let tmp_dir_offset = tmp_dir.path().to_str().unwrap().len() + 1;
let tmp_dir_str = tmp_dir.path().to_str().unwrap();
// unzip the TDS zip archive into a temporary directory
ut.unzip(tds_zip, tmp_dir_str)
// was E0033 error message
.with_context(|| format!("TDS zip archive {}", tds_zip))?;
// in order to compare the package files with the content of the
// tds zip archive we need to checksum the files in the tds zip
// archive.
let mut sizes: SizesHashMap = FxHashMap::default();
let mut pool = Pool::new(num_cpus::get() as u32 + 1);
{
// Processing a single file entry, with the "sizes" hashmap collecting
// same-size files. Entries are either Found::One or Found::Multiple,
// so that we can submit the first file's path as a hashing job when the
// first duplicate is found. Hashing each file is submitted as a job to
// the pool.
let mut process = |fsize, dir_entry: &DirEntry| {
let path = dir_entry.path().to_path_buf();
let sizeref = &mut sizes;
sizeref.entry(fsize).or_default().push(path);
};
let mut map_files_found = false;
let mut map_dvips_found = false;
// those top level directories are the directories found in the
// texmf-dist/ directory of a texlive installation
let tds_toplevel_dirs: FxHashSet<String> = [
"asymptote",
"bibtex",
"chktex",
"context",
"doc",
"dvipdfmx",
"dvips",
"fonts",
"hbf2gf",
"makeindex",
"metafont",
"metapost",
"mft",
"omega",
"pbibtex",
"psutils",
"scripts",
"source",
"tex",
"tex4ht",
"texconfig",
"texdoc",
"texdoctk",
"ttf2pk",
"web2c",
"xdvi",
"xindy",
]
.iter()
.map(|&s| s.to_string())
.collect();
// set to True if the TDS zip archive contains a top level directory doc/
let mut doc_found = false;
// we track the number of toplevel directories which must at least be 2
let mut number_of_toplevel_dirs = 0;
let re: Regex = Regex::new(r"fonts[/]map[/]dvips[/]").unwrap();
for dir_entry in WalkDir::new(tmp_dir.path().to_str().unwrap()).follow_links(false) {
match dir_entry {
Ok(dir_entry) => {
let dir_entry_str = match dir_entry.path().to_str() {
Some(d) => d,
None => {
e0031!(dir_entry.path().to_string_lossy());
continue;
}
};
// this is the file_name without the directory part
// unwrap() is ok here as we covered potential UTF-8 related errors
// above in the definition of dir_entry_str
let file_name = dir_entry.file_name().to_str().unwrap().to_string();
let meta = match dir_entry.metadata() {
Ok(meta) => meta,
Err(e) => {
e0027!(dir_entry.path().display(), e);
continue;
}
};
// let mtime = meta.modified().unwrap();
// if is_future_mtime(*NOW, mtime) {
// let diff = mtime.duration_since(*NOW).unwrap();
// println!(
// "{} has an mtime in the future by {} seconds",
// &file_name,
// diff.as_secs()
// );
// }
let ft = get_filetype(&dir_entry);
if let FType::Error(e) = ft {
e0023!(e);
continue;
}
// this is the path name without the temporary part
// from unpacking the TDS zip archive
let dir_entry_display = if dir_entry.depth() == 0 {
&dir_entry_str[tmp_dir_offset - 1..]
} else {
&dir_entry_str[tmp_dir_offset..]
};
let filetype = match ft {
FType::Directory => FileKind::Directory,
FType::Regular => FileKind::File,
FType::Symlink => {
e0043!(dir_entry_display);
continue;
}
_ => panic!(
"Unexpected file type for {} in zip archive",
dir_entry_display
),
};
register_duplicate_filename(&mut lcnames, dir_entry_display, filetype);
ut.check_for_temporary_file(dir_entry_display);
// In the top level directory of a TDS zip archive
// ... no files are allowed
// ... only specific directories are allowed
if dir_entry.depth() == 1 {
if ft == FType::Regular {
e0034!(dir_entry_display);
continue;
}
if !tds_toplevel_dirs.contains(&file_name) {
e0020!(&file_name);
} else {
number_of_toplevel_dirs += 1;
if &file_name == "doc" {
doc_found = true;
}
}
continue;
}
if ft == FType::Directory {
ut.check_for_empty_directory(dir_entry_str, dir_entry_display);
ut.check_for_hidden_directory(&file_name, dir_entry_display);
ut.is_unwanted_directory(&file_name, dir_entry_str);
continue;
}
// The LaTeX team provides the file `.tex` as a file with an empty name
// in order to make `\input\relax` work (explained by David Carlisle)
// Therefore, we don't call check_for_hidden_file() in this case
match (pkg_name, dir_entry_display) {
("latex-tools", "tex/latex/tools/.tex") => (),
("latex-tools-dev", "tex/latex-dev/tools/.tex") => (),
(_, _) => ut.check_for_hidden_file(&file_name, dir_entry_display),
};
let fsize = meta.len();
process(fsize, &dir_entry);
ut.check_filesize(fsize, dir_entry_display);
// if we encounter a .dtx or .ins file we check
// that it is in a subdirectory of either source/ or doc/
if (dir_entry_str.ends_with(".dtx") || dir_entry_str.ends_with(".ins"))
&& !(dir_entry_display.starts_with("source/")
|| dir_entry_display.starts_with("doc/"))
{
e0036!(dir_entry_display);
continue;
}
// if the path doesn't contain a man page...
if !dir_entry_str.contains("/man/") && !dir_entry_str.contains(pkg_name) {
if let Some(real_name) = pkg_replace.get(pkg_name) {
let pkg_name_s = format!("/{}/", real_name);
if !dir_entry_str.contains(&pkg_name_s) {
e0028!(pkg_name_s, dir_entry_display);
}
} else {
e0028!(pkg_name, dir_entry_display);
}
}
if !doc_found {
e0039!();
}
if number_of_toplevel_dirs < 2 {
e0040!();
}
if map_files_found && !map_dvips_found {
e0041!();
}
};
let mut tds_hashes: FxHashMap<(u64, Vec<u8>), Vec<PathBuf>> = FxHashMap::default();
pool.scoped(|scope| {
let (tx, rx) = channel();
let hashref = &mut tds_hashes;
scope.execute(move || {
for (size, path, hash) in rx.iter() {
hashref.entry((size, hash)).or_default().push(path);
}
});
for size in sizes.keys() {
for p in &sizes[size] {
let txc = tx.clone();
scope.execute(move || {
hash_file(*size, p.to_path_buf(), &txc)
.unwrap_or_else(|_| panic!("error hashing file {}", p.display()))
});
}
}
});
// now check if each package file is in the tds archive
for (k, paths) in hashes.iter() {
if !tds_hashes.contains_key(k) {
let p = &paths.dupes[0];
e0026!(p);
}
}
print_casefolding_tds(&lcnames);
Ok(())
}
// Very important
// The permissions getting back from a file or directory have
// #define S_IFREG 0100000 which means `regular file`
// which is defined in `/usr/include/linux/stat.h`
//
// This means that, e.g. instead of 0o644 we have to use 0o100644
//
fn check_and_correct_perms4(dir_entry: &str, p: u32) -> Result<()> {
if p & 0o100666 != p || p | 0o100644 != p {
e0002!(dir_entry, perms_to_string(p));
if ARGS.correct_perms {
i0005!(&dir_entry);
set_perms(dir_entry, 0o644)?;
}
}
Ok(())
}
fn check_and_correct_perms5(dir_entry: &str, p: u32) -> Result<()> {
if p & 0o100777 != p || p | 0o100755 != p {
e0002!(dir_entry, perms_to_string(p));
if ARGS.correct_perms {
i0005!(&dir_entry);
set_perms(dir_entry, 0o755)?;
}
}
Ok(())
}
// Sets permissions for a file or directory
// Sample invocation: set_perms("somfile", 0o644);
fn set_perms(entry: &str, p: u32) -> Result<()> {
let f = File::open(entry)?;
let attr = f.metadata()?;
let mut perms = attr.permissions();
let ps = &format!("{:o}", perms.mode());
perms.set_mode(p);
let ps1 = &format!("{:o}", p);
f.set_permissions(perms)?;
info!("mode of '{}' changed from {} to {} ", entry, ps, ps1);
let mut doublenames: FxHashMap<PathBuf, Vec<PathBuf>> = FxHashMap::default();
let mut inodes = FxHashMap::default();
let ut = Utils::new(utils::CheckType::Package);
i0002!(root);
// This hash contains all package file names.
//
// PathBuf: the full path starting at the directory specified at the command line
// Metadata: the meta data of the file
// String: the file name without any directory part
// ReadmeKind: is it a certain README, file or symlink?
// A special case of a README file is a file with has a different name but
// was pointed to by a symlink. Example: README --> README.rst
let mut file_names: FileNamesHashMap = FxHashMap::default();
let mut readme_found = false;
let root_absolute = PathBuf::from(root)
.canonicalize()
.unwrap()
.to_string_lossy()
.to_string();
for dir_entry in WalkDir::new(root).follow_links(false) {
match dir_entry {
Ok(dir_entry) => {
let dir_entry_str = match dir_entry.path().to_str() {
Some(d) => d,
None => {
e0031!(dir_entry.path().to_string_lossy());
continue;
}
};
let meta = match dir_entry.metadata() {
Ok(meta) => meta,
Err(e) => {
e0023!(e);
continue;
}
};
check_inode(&mut inodes, dir_entry_str, &meta);
// this is the file_name without the directory part
// unwrap() is ok here as we covered potential UTF-8 related errors
// above in the definition of dir_entry_str
let file_name = dir_entry.file_name().to_str().unwrap().to_string();
let mtime = meta.modified().unwrap();
if is_future_mtime(*NOW, mtime) {
let diff = mtime.duration_since(*NOW).unwrap();
w0011!(&file_name, diff.as_secs(), &utils::format_duration(&diff));
}
// we check for weird stuff like socket files aso.
let ft = get_filetype(&dir_entry);
if found_unwanted_filetype(dir_entry_str, &ft) {
continue;
}
// 2. dealing with directories
if ft == FType::Directory {
if let Some(_dir_name) = filename(dir_entry_str) {
register_duplicate_filename(
&mut lcnames,
dir_entry_str,
FileKind::Directory,
);
}
if !owner_has(p, 5) || !others_have(p, 5) {
e0011!(&dir_entry_str, perms_to_string(p));
if ARGS.correct_perms {
i0005!(&dir_entry_str);
set_perms(dir_entry_str, 0o775)?;
}
}
if is_readme(&file_name) {
// We want to deal with README files only if they are
// in the root directory of the package.
let f = format!(
"{}{}{}",
root,
// we have to pay attention if `root` ends already with '/'
if root.ends_with('/') { "" } else { "/" },
&file_name
);
let lc = LinkCheck::new(4, false);
let mut detective = filemagic::Filetype::new();
let mut sizes: SizesHashMap = FxHashMap::default();
let mut generated: GeneratedHashMap = FxHashMap::default();
// Processing a single file entry, with the "sizes" hashmap collecting
// same-size files. Entries are either Found::One or Found::Multiple,
// so that we can submit the first file's path as a hashing job when the
// first duplicate is found. Hashing each file is submitted as a job to
// the pool.
let mut process = |fsize, path: &PathBuf| {
let sizeref = &mut sizes;
let path = path.clone();
sizeref.entry(fsize).or_default().push(path);
};
for (path, (meta, _file_name, is_readme)) in file_names.iter() {
let dir_entry_str = match path.to_str() {
Some(d) => d,
None => {
e0031!(&path.to_string_lossy());
continue;
}
};
let fsize = meta.len();
ut.check_filesize(fsize, dir_entry_str);
let p = get_perms(path)?;
let ftr = detective.analyze(dir_entry_str);
//println!(">>> {:?}", ftr);
// we ignore errors from filetype recognition
if ftr.is_err() {
continue;
}
let ft = ftr.unwrap();
// DEBUG !readme_symlinked.contains(&dir_entry_str)
if ReadmeKind::No != *is_readme {
if !check_readme(dir_entry_str, is_readme, &ft) {
continue;
}
if ARGS.urlcheck {
lc.check_urls(dir_entry_str);
}
}
match ft {
filemagic::Mimetype::Text(_) => {
check_and_correct_perms4(dir_entry_str, p)?;
let fext = get_extension_from_filename(dir_entry_str);
if fext == Some("ins") || fext == Some("dtx") {
check_generated_files(dir_entry_str, &mut generated);
}
match fext {
// deal with Windows files
Some("bat") | Some("cmd") | Some("nsh") | Some("reg") => match ft {
filemagic::Mimetype::Text(LineEnding::Crlf) => (),
filemagic::Mimetype::Text(LineEnding::Cr) => {
e0037!(&dir_entry_str);
if ARGS.correct_le {
make_crlf(dir_entry_str);
}
}
filemagic::Mimetype::Text(LineEnding::Mixed(0, 0, 0)) => (),
filemagic::Mimetype::Text(LineEnding::Mixed(cr, lf, crlf)) => {
e0038!(&dir_entry_str, cr, lf, crlf);
if ARGS.correct_le {
fix_inconsistent_le(dir_entry_str);
}
}
filemagic::Mimetype::Text(LineEnding::Lf) => {
w0008!(&dir_entry_str);
}
fmm => error!("Should not occur: {} has {:?}", dir_entry_str, fmm),
},
Some(_) | None => match ft {
filemagic::Mimetype::Text(LineEnding::Crlf) => {
e0012!(&dir_entry_str);
if ARGS.correct_le {
fix_inconsistent_le(dir_entry_str);
}
}
filemagic::Mimetype::Text(LineEnding::Cr) => {
e0037!(&dir_entry_str);
if ARGS.correct_le {
fix_inconsistent_le(dir_entry_str);
}
}
filemagic::Mimetype::Text(LineEnding::Mixed(0, 0, 0)) => (),
filemagic::Mimetype::Text(LineEnding::Mixed(cr, lf, crlf)) => {
e0038!(&dir_entry_str, cr, lf, crlf);
if ARGS.correct_le {
fix_inconsistent_le(dir_entry_str);
}
}
filemagic::Mimetype::Text(LineEnding::Lf) => (),
fmm => error!("Should not occur: {} has {:?}", dir_entry_str, fmm),
},
}
}
if !(ARGS.ignore_dupes && tds_zip.is_none()) {
process(fsize, path);
}
}
print_casefolding(&lcnames);
print_generated(&doublenames, &generated);
print_hardlinks(&inodes);
if !ARGS.ignore_same_named {
print_doublenames(&doublenames);
}
if ARGS.ignore_dupes && tds_zip.is_none() {
return Ok(None);
}
// Set up thread pool for the task to hash a file. Number of CPUs + 1 has been
// found to be a good pool size, likely since the walker thread should be
// doing mostly IO.
let mut pool = Pool::new(num_cpus::get() as u32 + 1);
let mut hashes: FxHashMap<(u64, Vec<u8>), DupPath> = FxHashMap::default();
pool.scoped(|scope| {
let (tx, rx) = channel();
let hashref = &mut hashes;
scope.execute(move || {
for (size, path, hash) in rx.iter() {
hashref.entry((size, hash)).or_default().push(path);
}
});
for size in sizes.keys() {
let paths = &sizes[size];
if paths.len() == 1 && tds_zip.is_none() {
continue;
};
for p in &sizes[size] {
let txc = tx.clone();
scope.execute(move || {
hash_file(*size, p.to_path_buf(), &txc)
.unwrap_or_else(|_| panic!("error hashing file {}", p.display()))
})
}
}
});
if !ARGS.ignore_dupes {
print_duplicates(&hashes);
}
Ok(Some(hashes))
}
fn print_hardlinks(hashes: &FxHashMap<(u64, u64), Vec<String>>) {
for ((_devid, inode), eles) in hashes.iter() {
if eles.len() > 1 {
w0010!(inode);
for hfile in eles.iter() {
info!(" >>> {}", &hfile);
}
}
}
}
fn print_casefolding_tds(hashes: &FxHashMap<PathBuf, Vec<(PathBuf, FileKind)>>) {
for (k, eles) in hashes.iter() {
// println!("pcf_tds: {:?}, {:?}", k, &eles);
if eles.len() == 1 {
continue;
}
e0042!(k.display());
for (p, ty) in eles {
info!(" >>> {} ({})", p.display(), ty);
}
}
}
fn print_casefolding(hashes: &FxHashMap<PathBuf, Vec<(PathBuf, FileKind)>>) {
for (k, eles) in hashes.iter() {
//println!("pcf: {:?}, {:?}", k, &eles);
if eles.len() == 1 {
continue;
}
e0025!(k.display());
for (p, ty) in eles {
info!(" >>> {} ({})", p.display(), ty);
}
}
}
fn print_generated(doublenames: &FxHashMap<PathBuf, Vec<PathBuf>>, generated: &GeneratedHashMap) {
// `k` is generated by `gen`
for (k, generator) in generated.iter() {
let path = PathBuf::from(k);
if doublenames.contains_key(&path) {
if k.ends_with(".ins") || k.ends_with(".pdf") {
//println!("key {}, gen {}", k, gen);
continue;
}
let v = &doublenames[&path];
for fname in v {
e0019!(fname.to_str().unwrap(), generator.as_str());
}
}
}
}