Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster image compare option #529

Merged
merged 1 commit into from
Dec 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 73 additions & 5 deletions czkawka_core/src/similar_images.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ pub struct SimilarImages {
delete_outdated_cache: bool,
exclude_images_with_same_size: bool,
use_reference_folders: bool,
fast_comparing: bool,
}

/// Info struck with helpful information's about results
Expand Down Expand Up @@ -151,6 +152,7 @@ impl SimilarImages {
delete_outdated_cache: true,
exclude_images_with_same_size: false,
use_reference_folders: false,
fast_comparing: false,
}
}

Expand Down Expand Up @@ -179,6 +181,10 @@ impl SimilarImages {
self.image_filter = image_filter;
}

pub fn set_fast_comparing(&mut self, fast_comparing: bool) {
self.fast_comparing = fast_comparing;
}

pub fn get_stopped_search(&self) -> bool {
self.stopped_search
}
Expand Down Expand Up @@ -633,7 +639,10 @@ impl SimilarImages {
let progress_send = progress_sender.clone();
let progress_thread_run = progress_thread_run.clone();
let atomic_mode_counter = atomic_mode_counter.clone();
let all_images = similarity as usize * available_hashes.len();
let all_images = match self.fast_comparing {
false => similarity as usize * available_hashes.len(),
true => available_hashes.len(),
};
thread::spawn(move || loop {
progress_send
.unbounded_send(ProgressData {
Expand All @@ -653,7 +662,7 @@ impl SimilarImages {
};
//// PROGRESS THREAD END
if similarity >= 1 {
for current_similarity in 1..=similarity {
if self.fast_comparing {
this_time_check_hashes = available_hashes.clone();

if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
Expand All @@ -670,7 +679,7 @@ impl SimilarImages {
let vector_with_found_similar_hashes = self
.bktree
.find(&hash, similarity)
.filter(|(similarity, hash)| (*similarity == current_similarity) && !master_of_group.contains(*hash) && available_hashes.contains_key(*hash))
.filter(|(_similarity, hash)| !master_of_group.contains(*hash) && available_hashes.contains_key(*hash))
.collect::<Vec<_>>();

// Not found any hash with specific distance
Expand Down Expand Up @@ -701,15 +710,74 @@ impl SimilarImages {
}
}

vector_with_found_similar_hashes.iter().for_each(|(_similarity, other_hash)| {
vector_with_found_similar_hashes.iter().for_each(|(similarity, other_hash)| {
let mut vec_fe = available_hashes.remove(*other_hash).unwrap();
for fe in &mut vec_fe {
fe.similarity = Similarity::Similar(current_similarity)
fe.similarity = Similarity::Similar(*similarity)
}

collected_similar_images.get_mut(&hash).unwrap().append(&mut vec_fe);
});
}
} else {
for current_similarity in 1..=similarity {
this_time_check_hashes = available_hashes.clone();

if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
// End thread which send info to gui
progress_thread_run.store(false, Ordering::Relaxed);
progress_thread_handle.join().unwrap();
return false;
}

for (hash, vec_file_entry) in this_time_check_hashes.into_iter() {
atomic_mode_counter.fetch_add(1, Ordering::Relaxed);

// Finds hashes with specific distance to
let vector_with_found_similar_hashes = self
.bktree
.find(&hash, similarity)
.filter(|(similarity, hash)| (*similarity == current_similarity) && !master_of_group.contains(*hash) && available_hashes.contains_key(*hash))
.collect::<Vec<_>>();

// Not found any hash with specific distance
if vector_with_found_similar_hashes.is_empty() {
continue;
}

// Current checked hash isn't in any group of similarity, so we create one, because found similar images
if !master_of_group.contains(&hash) {
master_of_group.insert(hash.clone());
collected_similar_images.insert(hash.clone(), Vec::new());

let mut things: Vec<FileEntry> = vec_file_entry
.into_iter()
.map(|mut fe| {
fe.similarity = Similarity::Similar(0);
fe
})
.collect();
collected_similar_images.get_mut(&hash).unwrap().append(&mut things);

// This shouldn't be executed too much times, so it should be quite fast to check this
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
// End thread which send info to gui
progress_thread_run.store(false, Ordering::Relaxed);
progress_thread_handle.join().unwrap();
return false;
}
}

vector_with_found_similar_hashes.iter().for_each(|(_similarity, other_hash)| {
let mut vec_fe = available_hashes.remove(*other_hash).unwrap();
for fe in &mut vec_fe {
fe.similarity = Similarity::Similar(current_similarity)
}

collected_similar_images.get_mut(&hash).unwrap().append(&mut vec_fe);
});
}
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions czkawka_gui/src/connect_button_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ pub fn connect_button_search(
let button_settings = gui_data.header.button_settings.clone();
let button_app_info = gui_data.header.button_app_info.clone();
let check_button_music_approximate_comparison = gui_data.main_notebook.check_button_music_approximate_comparison.clone();
let check_button_image_fast_compare = gui_data.main_notebook.check_button_image_fast_compare.clone();

buttons_search_clone.connect_clicked(move |_| {
let included_directories = get_path_buf_from_vector_of_strings(get_string_from_list_store(&tree_view_included_directories, ColumnsIncludedDirectory::Path as i32, None));
Expand Down Expand Up @@ -292,6 +293,8 @@ pub fn connect_button_search(

let delete_outdated_cache = check_button_settings_similar_images_delete_outdated_cache.is_active();

let fast_compare = check_button_image_fast_compare.is_active();

let futures_sender_similar_images = futures_sender_similar_images.clone();
// Find similar images
thread::spawn(move || {
Expand All @@ -312,6 +315,7 @@ pub fn connect_button_search(
sf.set_allowed_extensions(allowed_extensions);
sf.set_delete_outdated_cache(delete_outdated_cache);
sf.set_exclude_images_with_same_size(ignore_same_size);
sf.set_fast_comparing(fast_compare);
sf.find_similar_images(Some(&stop_receiver), Some(&futures_sender_similar_images));
let _ = glib_stop_sender.send(Message::SimilarImages(sf));
});
Expand Down
9 changes: 9 additions & 0 deletions czkawka_gui/src/gui_main_notebook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ pub struct GuiMainNotebook {
pub check_button_image_ignore_same_size: gtk::CheckButton,
pub check_button_video_ignore_same_size: gtk::CheckButton,

pub check_button_image_fast_compare: gtk::CheckButton,

pub label_image_similarity: gtk::Label,
pub label_image_similarity_max: gtk::Label,

Expand Down Expand Up @@ -206,6 +208,8 @@ impl GuiMainNotebook {
let scale_similarity_similar_images: gtk::Scale = builder.object("scale_similarity_similar_images").unwrap();
let scale_similarity_similar_videos: gtk::Scale = builder.object("scale_similarity_similar_videos").unwrap();

let check_button_image_fast_compare: gtk::CheckButton = builder.object("check_button_image_fast_compare").unwrap();

let combo_box_image_resize_algorithm: gtk::ComboBoxText = builder.object("combo_box_image_resize_algorithm").unwrap();
let combo_box_image_hash_algorithm: gtk::ComboBoxText = builder.object("combo_box_image_hash_algorithm").unwrap();
let combo_box_image_hash_size: gtk::ComboBoxText = builder.object("combo_box_image_hash_size").unwrap();
Expand Down Expand Up @@ -292,6 +296,7 @@ impl GuiMainNotebook {
combo_box_duplicate_hash_type,
combo_box_image_hash_size,
check_button_video_ignore_same_size,
check_button_image_fast_compare,
}
}

Expand Down Expand Up @@ -354,6 +359,10 @@ impl GuiMainNotebook {
self.check_button_image_ignore_same_size.set_label(&fl!("check_button_general_same_size"));
self.check_button_video_ignore_same_size.set_label(&fl!("check_button_general_same_size"));

self.check_button_image_fast_compare.set_label(&fl!("main_notebook_image_fast_compare"));
self.check_button_image_fast_compare
.set_tooltip_text(Some(&fl!("main_notebook_image_fast_compare_tooltip")));

{
let hash_size_index = self.combo_box_image_hash_size.active().unwrap() as usize;
let hash_size = IMAGES_HASH_SIZE_COMBO_BOX[hash_size_index];
Expand Down
15 changes: 15 additions & 0 deletions czkawka_gui/ui/main_window.glade
Original file line number Diff line number Diff line change
Expand Up @@ -1070,6 +1070,21 @@ Author: Rafał Mikrut
<property name="position">4</property>
</packing>
</child>
<child>
<object class="GtkCheckButton" id="check_button_image_fast_compare">
<property name="label" translatable="yes">Fast compare</property>
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="receives-default">False</property>
<property name="margin-start">7</property>
<property name="draw-indicator">True</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">5</property>
</packing>
</child>
</object>
<packing>
<property name="expand">False</property>
Expand Down
10 changes: 9 additions & 1 deletion i18n/en/czkawka_gui.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,19 @@ image_hash_size_tooltip =
Default value for hash is 8 bytes, which allows to find very similar and different images. 16 and 32 hashes should be used only for nearly identical images. 64 bytes hash shouldn't be used, except situation where really small differences are needed to find

image_resize_filter_tooltip =
To compute hash of image, library must first resize it. Depend on choosen algorithm, resulted image will looks little different. The fastest algotithm to use, but also one which gives the worst results is Nearest.
To compute hash of image, library must first resize it. Depend on chosen algorithm, resulted image will looks little different. The fastest algorithm to use, but also one which gives the worst results is Nearest.

image_hash_alg_tooltip =
Users can choose one from many algorithms of calculating hash. Each have both strong and weaker points and will give sometimes better and sometimes worse results for different images, so to choose the best one, manual testing is required.

main_notebook_image_fast_compare = Fast compare
main_notebook_image_fast_compare_tooltip =
Speedup searching and comparing hashes.

In opposite to normal mode where each hash is compared to each other x times, where x is similarity which user choose, in this mode always only one comparing is used.

This option is recommended when comparing >10000 images with non 0(Very High) similarity.

main_notebook_duplicates = Duplicate Files
main_notebook_empty_directories = Empty Directories
main_notebook_big_files = Big Files
Expand Down