impl LinkCheck {
pub fn new(num_threads: usize, print_all: bool) -> LinkCheck {
// TODO not sure if I really need this
// so I will remove it and see it this will cause problems
// unsafe {
// openssl_probe::init_openssl_env_vars();
// }
let pool = Mutex::new(ThreadPool::new(num_threads));
LinkCheck {
pool,
urlhash: Arc::new(Mutex::new(FxHashMap::default())),
print_all,
}
}
pub fn check_urls(&self, fname: &str) {
let print_all = self.print_all;
if let Some(links) = get_links(fname) {
for l in links {
let urlhash = self.urlhash.clone();
let fname_s = String::from(fname);
self.pool.lock().unwrap().execute(move || {
check_link(&l, &fname_s, &urlhash, print_all);
});
}
}
}
}
impl Drop for LinkCheck {
fn drop(&mut self) {
//println!("Now dropping ...");
let pool = self.pool.lock().unwrap();
pool.join();
}
}
// It is very important to keep the lock for the urlhash
// only for a short period of time
//
// If we don't find the url in the urlhash then
// we set `run_check_link` to `true` so that we will
// check the url
{
let f = String::from(fname);
let mut urlhash = urlhash.lock().unwrap();
if !urlhash.contains_key(&url) {
let mut hs = FxHashSet::default();
hs.insert(f);
let url1 = url.clone();
//
if run_check_link {
match check_link_inner(&url, true) {
UrlStatus::UrlOk => {
let mut urlhash = urlhash.lock().unwrap();
if let Some(hs) = urlhash.get_mut(&url) {
if print_all {
for p in hs.paths.iter() {
print_ok(super::ARGS.no_colors, &url, p);
}
}
hs.status = UrlStatus::UrlOk;
}
}
UrlStatus::UrlError(e) => {
let mut urlhash = urlhash.lock().unwrap();
if let Some(hs) = urlhash.get_mut(&url) {
for p in hs.paths.iter() {
e0022!(p, e);
}
hs.status = UrlStatus::UrlError(e);
}
}
_ => (),
}
}
}
fn get_links(fname: &str) -> Option<Vec<String>> {
let fhdl = File::open(fname);
match fhdl {
Ok(mut f) => {
let mut buf = Vec::new();
let v = vec![("ö", "ö"), ("ü", "ü")];
let mut url_new = String::from(url);
for (e, r) in v {
url_new = url_new.replace(e, r);
}
url_new.into()
}
// retrieves links in a string and then checks those links
fn get_links_inner(s: &str) -> Option<Vec<String>> {
let mut finder = LinkFinder::new();
finder.kinds(&[LinkKind::Url]);
// finder.links() does the actual search for URLs
let links: Vec<_> = finder.links(s).collect();
let result: Vec<&str> = links.iter().map(|e| e.as_str()).collect();
let mut links = vec![];
for r in result {
if !r.starts_with("http://") && !r.starts_with("https://") && !r.starts_with("ftp://") {
continue;
}
// This is a workaround to prevent URLs ending with certain characters
let url = resolve_entities(r.trim_end_matches(['。', '`']));
let default_policy = reqwest::redirect::Policy::default();
let policy = reqwest::redirect::Policy::custom(move |attempt| {
if attempt.url().host_str() == Some("127.0.0.1") {
attempt.stop()
} else {
default_policy.redirect(attempt)
}
});
let cb = reqwest::blocking::Client::builder()
.gzip(true)
.redirect(policy)
.default_headers(headers)
.timeout(Duration::from_secs(7))
.build()
.unwrap();
// let url: Url =
// match l.parse() {
// Ok(url) => url,
// Err(e) => { println!("Error: {:?}", e); panic!("Scheiss"); }
// };
let resp = if head {
cb.head(l).send()
} else {
cb.get(l).send()
};
match resp {
Ok(s) => {
if s.status().is_informational()
|| s.status().is_success()
|| s.status().is_redirection()
{
return UrlStatus::UrlOk;
}
if head {
check_link_inner(l, false)
} else {
let e = format!("{}: {}", l, s.status());
UrlStatus::UrlError(e)
}
}
Err(e) => {
let e = format!("{}", e);
UrlStatus::UrlError(e)
}
}
}
fn print_ok(no_colors: bool, url: &str, f: &str) {
if no_colors {
info!("✔ {} in {}", &url, f);
} else {
// println!("✔ {} in {}", &url, f);
info!("{} {} in {}", "✔".bright_green().bold(), url, f);
}
}