Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add multithread support to similar image finder #98

Merged
merged 1 commit into from
Nov 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion czkawka_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repository = "https://github.com/qarmin/czkawka"
[dependencies]
humansize = "1"
blake3 = "0.3"
#rayon = "1"
rayon = "1"
crossbeam-channel = "0.4.4"


Expand Down
63 changes: 46 additions & 17 deletions czkawka_core/src/similar_images.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crossbeam_channel::Receiver;
use humansize::{file_size_opts as options, FileSize};
use image::GenericImageView;
use img_hash::HasherConfig;
use rayon::prelude::*;
use std::collections::HashMap;
use std::fs;
use std::fs::{File, Metadata};
Expand Down Expand Up @@ -58,6 +59,7 @@ pub struct SimilarImages {
image_hashes: HashMap<Node, Vec<FileEntry>>, // Hashmap with image hashes and Vector with names of files
stopped_search: bool,
similarity: Similarity,
images_to_check: Vec<FileEntry>,
}

/// Info struck with helpful information's about results
Expand Down Expand Up @@ -95,6 +97,7 @@ impl SimilarImages {
image_hashes: Default::default(),
stopped_search: false,
similarity: Similarity::High,
images_to_check: vec![],
}
}

Expand Down Expand Up @@ -231,16 +234,10 @@ impl SimilarImages {
continue 'dir;
}

let image = match image::open(&current_file_name) {
Ok(t) => t,
Err(_) => continue 'dir, // Something is wrong with image
};
let dimensions = image.dimensions();
// Creating new file entry
let fe: FileEntry = FileEntry {
path: current_file_name.clone(),
size: metadata.len(),
dimensions: format!("{}x{}", dimensions.0, dimensions.1),
dimensions: "".to_string(),
modified_date: match metadata.modified() {
Ok(t) => match t.duration_since(UNIX_EPOCH) {
Ok(d) => d.as_secs(),
Expand All @@ -257,15 +254,8 @@ impl SimilarImages {

similarity: Similarity::None,
};
let hasher = HasherConfig::with_bytes_type::<[u8; 8]>().to_hasher();

let hash = hasher.hash_image(&image);
let mut buf = [0u8; 8];
buf.copy_from_slice(&hash.as_bytes());

self.bktree.add(buf);
self.image_hashes.entry(buf).or_insert_with(Vec::<FileEntry>::new);
self.image_hashes.get_mut(&buf).unwrap().push(fe);
self.images_to_check.push(fe);

self.information.size_of_checked_images += metadata.len();
self.information.number_of_checked_files += 1;
Expand All @@ -286,6 +276,42 @@ impl SimilarImages {
fn sort_images(&mut self, stop_receiver: Option<&Receiver<()>>) -> bool {
let hash_map_modification = SystemTime::now();

let vec_file_entry = self
.images_to_check
.par_iter()
.map(|file_entry| {
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
// This will not break
return None;
}
let mut file_entry = file_entry.clone();

let image = match image::open(file_entry.path.clone()) {
Ok(t) => t,
Err(_) => return Option::from((file_entry, [0u8; 8], false)), // Something is wrong with image
};
let dimensions = image.dimensions();

file_entry.dimensions = format!("{}x{}", dimensions.0, dimensions.1);
let hasher = HasherConfig::with_bytes_type::<[u8; 8]>().to_hasher();

let hash = hasher.hash_image(&image);
let mut buf = [0u8; 8];
buf.copy_from_slice(&hash.as_bytes());

Option::from((file_entry, buf, true))
})
.while_some()
.filter(|file_entry| file_entry.2)
.map(|file_entry| (file_entry.0, file_entry.1))
.collect::<Vec<(_, _)>>();

for (file_entry, buf) in vec_file_entry {
self.bktree.add(buf);
self.image_hashes.entry(buf).or_insert_with(Vec::<FileEntry>::new);
self.image_hashes.get_mut(&buf).unwrap().push(file_entry.clone());
}

//let hash_map_modification = SystemTime::now();
let similarity: u64 = match self.similarity {
Similarity::VeryHigh => 0,
Expand Down Expand Up @@ -327,7 +353,7 @@ impl SimilarImages {

for (similarity, similar_hash) in vector_with_found_similar_hashes.iter() {
if *similarity == 0 && hash == *similar_hash {
// This was already readed before
// This was already read before
continue;
} else if hash == *similar_hash {
panic!("I'm not sure if same hash can have distance > 0");
Expand Down Expand Up @@ -355,7 +381,10 @@ impl SimilarImages {
hashes_to_check.remove(*similar_hash);
}
}
new_vector.push((*vector_of_similar_images).to_owned());
if vector_of_similar_images.len() > 1 {
// Not sure why it may happens
new_vector.push((*vector_of_similar_images).to_owned());
}
}

self.similar_vectors = new_vector;
Expand Down
98 changes: 91 additions & 7 deletions czkawka_gui/czkawka.glade
Original file line number Diff line number Diff line change
Expand Up @@ -745,13 +745,13 @@ Author: Rafał Mikrut
<property name="visible">True</property>
<property name="can_focus">False</property>
<child>
<object class="GtkRadioButton" id="radio_button_name">
<object class="GtkRadioButton" id="radio_button_duplicates_name">
<property name="label" translatable="yes">Name(very fast)</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_hash</property>
<property name="group">radio_button_duplicates_hash</property>
</object>
<packing>
<property name="expand">False</property>
Expand All @@ -760,13 +760,13 @@ Author: Rafał Mikrut
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_size">
<object class="GtkRadioButton" id="radio_button_duplicates_size">
<property name="label" translatable="yes">Size(very fast)</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_hash</property>
<property name="group">radio_button_duplicates_hash</property>
</object>
<packing>
<property name="expand">False</property>
Expand All @@ -775,13 +775,13 @@ Author: Rafał Mikrut
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_hashmb">
<object class="GtkRadioButton" id="radio_button_duplicates_hashmb">
<property name="label" translatable="yes">HashMb(fast)</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_hash</property>
<property name="group">radio_button_duplicates_hash</property>
</object>
<packing>
<property name="expand">False</property>
Expand All @@ -790,7 +790,7 @@ Author: Rafał Mikrut
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_hash">
<object class="GtkRadioButton" id="radio_button_duplicates_hash">
<property name="label" translatable="yes">Hash(slow but accurate)</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
Expand Down Expand Up @@ -1066,6 +1066,90 @@ Author: Rafał Mikrut
<property name="position">0</property>
</packing>
</child>
<child>
<object class="GtkBox">
<property name="visible">True</property>
<property name="can_focus">False</property>
<child>
<object class="GtkLabel">
<property name="visible">True</property>
<property name="can_focus">False</property>
<property name="label" translatable="yes">Similarity level </property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">0</property>
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_similar_images_small">
<property name="label" translatable="yes">Small</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_similar_images_very_high</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">1</property>
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_similar_images_medium">
<property name="label" translatable="yes">Medium</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_similar_images_very_high</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">2</property>
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_similar_images_high">
<property name="label" translatable="yes">High</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="active">True</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_similar_images_very_high</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">3</property>
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_similar_images_very_high">
<property name="label" translatable="yes">Very High</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="active">True</property>
<property name="draw_indicator">True</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">4</property>
</packing>
</child>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">1</property>
</packing>
</child>
<child>
<object class="GtkScrolledWindow" id="scrolled_window_similar_images_finder">
<property name="visible">True</property>
Expand Down
34 changes: 26 additions & 8 deletions czkawka_gui/src/connect_button_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
let entry_same_music_minimal_size = gui_data.entry_same_music_minimal_size.clone();
let entry_allowed_extensions = gui_data.entry_allowed_extensions.clone();
let buttons_names = gui_data.buttons_names.clone();
let radio_button_name = gui_data.radio_button_name.clone();
let radio_button_size = gui_data.radio_button_size.clone();
let radio_button_hashmb = gui_data.radio_button_hashmb.clone();
let radio_button_hash = gui_data.radio_button_hash.clone();
let radio_button_duplicates_name = gui_data.radio_button_duplicates_name.clone();
let radio_button_duplicates_size = gui_data.radio_button_duplicates_size.clone();
let radio_button_duplicates_hashmb = gui_data.radio_button_duplicates_hashmb.clone();
let radio_button_duplicates_hash = gui_data.radio_button_duplicates_hash.clone();
let radio_button_similar_images_small = gui_data.radio_button_similar_images_small.clone();
let radio_button_similar_images_medium = gui_data.radio_button_similar_images_medium.clone();
let radio_button_similar_images_high = gui_data.radio_button_similar_images_high.clone();
let radio_button_similar_images_very_high = gui_data.radio_button_similar_images_very_high.clone();
let entry_duplicate_minimal_size = gui_data.entry_duplicate_minimal_size.clone();
let stop_receiver = gui_data.stop_receiver.clone();
let entry_big_files_number = gui_data.entry_big_files_number.clone();
Expand Down Expand Up @@ -70,13 +74,13 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
get_list_store(&scrolled_window_duplicate_finder).clear();

let check_method;
if radio_button_name.get_active() {
if radio_button_duplicates_name.get_active() {
check_method = duplicate::CheckingMethod::Name;
} else if radio_button_size.get_active() {
} else if radio_button_duplicates_size.get_active() {
check_method = duplicate::CheckingMethod::Size;
} else if radio_button_hashmb.get_active() {
} else if radio_button_duplicates_hashmb.get_active() {
check_method = duplicate::CheckingMethod::HashMB;
} else if radio_button_hash.get_active() {
} else if radio_button_duplicates_hash.get_active() {
check_method = duplicate::CheckingMethod::Hash;
} else {
panic!("No radio button is pressed");
Expand Down Expand Up @@ -186,6 +190,19 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
Err(_) => 1024 * 16, // By default
};

let similarity;
if radio_button_similar_images_small.get_active() {
similarity = similar_images::Similarity::Small;
} else if radio_button_similar_images_medium.get_active() {
similarity = similar_images::Similarity::Medium;
} else if radio_button_similar_images_high.get_active() {
similarity = similar_images::Similarity::High;
} else if radio_button_similar_images_very_high.get_active() {
similarity = similar_images::Similarity::VeryHigh;
} else {
panic!("No radio button is pressed");
}

// Find similar images
thread::spawn(move || {
let mut sf = SimilarImages::new();
Expand All @@ -195,6 +212,7 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
sf.set_recursive_search(recursive_search);
sf.set_excluded_items(excluded_items);
sf.set_minimal_file_size(minimal_file_size);
sf.set_similarity(similarity);
sf.find_similar_images(Option::from(&receiver_stop));
let _ = sender.send(Message::SimilarImages(sf));
});
Expand Down
Loading