Skip to content

Commit 110d601

Browse files
authored
Similar images improvements: (#97)
Headers without data By default only checking for High Similarity(faster search)
1 parent 5d4f4db commit 110d601

10 files changed

+162
-291
lines changed

czkawka_cli/src/main.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use czkawka_core::{
1111
empty_files::{self, EmptyFiles},
1212
empty_folder::EmptyFolder,
1313
same_music::SameMusic,
14-
similar_files::SimilarImages,
14+
similar_images::SimilarImages,
1515
temporary::{self, Temporary},
1616
zeroed::{self, ZeroedFiles},
1717
};

czkawka_core/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ pub mod common_items;
1414
pub mod common_messages;
1515
pub mod common_traits;
1616
pub mod same_music;
17-
pub mod similar_files;
17+
pub mod similar_images;
1818
pub mod zeroed;
1919

2020
pub const CZKAWKA_VERSION: &str = env!("CARGO_PKG_VERSION");

czkawka_core/src/similar_files.rs czkawka_core/src/similar_images.rs

+87-53
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,6 @@ pub struct FileEntry {
3232
pub modified_date: u64,
3333
pub similarity: Similarity,
3434
}
35-
#[derive(Clone)]
36-
pub struct StructSimilar {
37-
pub base_image: FileEntry,
38-
pub similar_images: Vec<FileEntry>,
39-
}
4035

4136
/// Type to store for each entry in the similarity BK-tree.
4237
type Node = [u8; 8];
@@ -57,11 +52,12 @@ pub struct SimilarImages {
5752
directories: Directories,
5853
excluded_items: ExcludedItems,
5954
bktree: BKTree<Node, Hamming>,
60-
similar_vectors: Vec<StructSimilar>,
55+
similar_vectors: Vec<Vec<FileEntry>>,
6156
recursive_search: bool,
6257
minimal_file_size: u64,
6358
image_hashes: HashMap<Node, Vec<FileEntry>>, // Hashmap with image hashes and Vector with names of files
6459
stopped_search: bool,
60+
similarity: Similarity,
6561
}
6662

6763
/// Info struck with helpful information's about results
@@ -98,6 +94,7 @@ impl SimilarImages {
9894
minimal_file_size: 1024 * 16, // 16 KB should be enough to exclude too small images from search
9995
image_hashes: Default::default(),
10096
stopped_search: false,
97+
similarity: Similarity::High,
10198
}
10299
}
103100

@@ -109,7 +106,7 @@ impl SimilarImages {
109106
&self.text_messages
110107
}
111108

112-
pub const fn get_similar_images(&self) -> &Vec<StructSimilar> {
109+
pub const fn get_similar_images(&self) -> &Vec<Vec<FileEntry>> {
113110
&self.similar_vectors
114111
}
115112

@@ -127,6 +124,9 @@ impl SimilarImages {
127124
t => t,
128125
};
129126
}
127+
pub fn set_similarity(&mut self, similarity: Similarity) {
128+
self.similarity = similarity;
129+
}
130130

131131
/// Public function used by CLI to search for empty folders
132132
pub fn find_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>) {
@@ -135,6 +135,10 @@ impl SimilarImages {
135135
self.stopped_search = true;
136136
return;
137137
}
138+
if !self.sort_images(stop_receiver) {
139+
self.stopped_search = true;
140+
return;
141+
}
138142
// if self.delete_folders {
139143
// self.delete_empty_folders();
140144
// }
@@ -275,37 +279,51 @@ impl SimilarImages {
275279
}
276280
}
277281
}
282+
Common::print_time(start_time, SystemTime::now(), "check_for_similar_images".to_string());
283+
true
284+
}
278285

286+
fn sort_images(&mut self, stop_receiver: Option<&Receiver<()>>) -> bool {
279287
let hash_map_modification = SystemTime::now();
280288

281-
let mut new_vector: Vec<StructSimilar> = Vec::new();
289+
//let hash_map_modification = SystemTime::now();
290+
let similarity: u64 = match self.similarity {
291+
Similarity::VeryHigh => 0,
292+
Similarity::High => 1,
293+
Similarity::Medium => 2,
294+
Similarity::Small => 3,
295+
_ => panic!("0-3 similarity levels are allowed, check if not added more."),
296+
};
297+
298+
// TODO
299+
// Now is A is similar to B with VeryHigh and C with Medium
300+
// And D is similar with C with High
301+
// And Similarity is set to Medium(or lower)
302+
// And A is checked before D
303+
// Then C is shown that is similar group A, not D
304+
305+
let mut new_vector: Vec<Vec<FileEntry>> = Vec::new();
306+
let mut hashes_to_check = self.image_hashes.clone();
282307
for (hash, vec_file_entry) in &self.image_hashes {
283308
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
284309
return false;
285310
}
286-
let vector_with_found_similar_hashes = self.bktree.find(hash, 3).collect::<Vec<_>>();
311+
let vector_with_found_similar_hashes = self.bktree.find(hash, similarity).collect::<Vec<_>>();
287312
if vector_with_found_similar_hashes.len() == 1 && vec_file_entry.len() == 1 {
288-
// Exists only 1 unique picture, so there is no need to use it
313+
// This one picture doesn't have similar pictures, so there is no go
289314
continue;
290315
}
291316

292-
let mut vec_similarity_struct: Vec<StructSimilar> = Vec::new();
293-
294-
for file_entry in vec_file_entry.iter() {
295-
let similar_struct = StructSimilar {
296-
base_image: file_entry.clone(),
297-
similar_images: vec_file_entry
298-
.iter()
299-
.filter(|x| x.path != file_entry.path)
300-
.map(|x| {
301-
let mut y = x.clone();
302-
y.similarity = Similarity::VeryHigh;
303-
y
304-
})
305-
.collect::<Vec<FileEntry>>(),
306-
};
307-
vec_similarity_struct.push(similar_struct);
308-
}
317+
let mut vector_of_similar_images: Vec<FileEntry> = vec_file_entry
318+
.iter()
319+
.map(|fe| FileEntry {
320+
path: fe.path.clone(),
321+
size: fe.size,
322+
dimensions: fe.dimensions.clone(),
323+
modified_date: fe.modified_date,
324+
similarity: Similarity::VeryHigh,
325+
})
326+
.collect();
309327

310328
for (similarity, similar_hash) in vector_with_found_similar_hashes.iter() {
311329
if *similarity == 0 && hash == *similar_hash {
@@ -315,31 +333,34 @@ impl SimilarImages {
315333
panic!("I'm not sure if same hash can have distance > 0");
316334
}
317335

318-
for file_entry in self.image_hashes.get(*similar_hash).unwrap() {
319-
let mut file_entry = file_entry.clone();
320-
file_entry.similarity = match similarity {
321-
0 => Similarity::VeryHigh,
322-
1 => Similarity::High,
323-
2 => Similarity::Medium,
324-
3 => Similarity::Small,
325-
_ => panic!("0-3 similarity levels are allowed, check if not added more."),
326-
};
327-
for similarity_struct in vec_similarity_struct.iter_mut() {
328-
similarity_struct.similar_images.push(file_entry.clone());
329-
}
336+
if let Some(vec_file_entry) = hashes_to_check.get(*similar_hash) {
337+
vector_of_similar_images.append(
338+
&mut (vec_file_entry
339+
.iter()
340+
.map(|fe| FileEntry {
341+
path: fe.path.clone(),
342+
size: fe.size,
343+
dimensions: fe.dimensions.clone(),
344+
modified_date: fe.modified_date,
345+
similarity: match similarity {
346+
0 => Similarity::VeryHigh,
347+
1 => Similarity::High,
348+
2 => Similarity::Medium,
349+
3 => Similarity::Small,
350+
_ => panic!("0-3 similarity levels are allowed, check if not added more."),
351+
},
352+
})
353+
.collect::<Vec<_>>()),
354+
);
355+
hashes_to_check.remove(*similar_hash);
330356
}
331357
}
332-
for similarity_struct in vec_similarity_struct.iter_mut() {
333-
similarity_struct.similar_images.sort_by(|x, y| y.similarity.cmp(&x.similarity));
334-
}
335-
new_vector.append(&mut vec_similarity_struct);
358+
new_vector.push((*vector_of_similar_images).to_owned());
336359
}
337360

338361
self.similar_vectors = new_vector;
339362

340-
#[allow(clippy::blocks_in_if_conditions)]
341-
Common::print_time(hash_map_modification, SystemTime::now(), "hash_map_modification(internal)".to_string());
342-
Common::print_time(start_time, SystemTime::now(), "check_for_similar_images".to_string());
363+
Common::print_time(hash_map_modification, SystemTime::now(), "sort_images".to_string());
343364
true
344365
}
345366

@@ -413,13 +434,13 @@ impl SaveResults for SimilarImages {
413434
if !self.similar_vectors.is_empty() {
414435
write!(file, "{} images which have similar friends\n\n", self.similar_vectors.len()).unwrap();
415436

416-
for struct_similar in self.similar_vectors.iter() {
417-
writeln!(file, "Image {:?} have {} similar images", struct_similar.base_image.path, struct_similar.similar_images.len()).unwrap();
418-
for similar_picture in struct_similar.similar_images.iter() {
419-
writeln!(file, "{:?} - Similarity Level: {}", similar_picture.path, get_string_from_similarity(&similar_picture.similarity)).unwrap();
420-
}
421-
writeln!(file).unwrap();
422-
}
437+
// for struct_similar in self.similar_vectors.iter() {
438+
// writeln!(file, "Image {:?} have {} similar images", struct_similar.base_image.path, struct_similar.similar_images.len()).unwrap();
439+
// for similar_picture in struct_similar.similar_images.iter() {
440+
// writeln!(file, "{:?} - Similarity Level: {}", similar_picture.path, get_string_from_similarity(&similar_picture.similarity)).unwrap();
441+
// }
442+
// writeln!(file).unwrap();
443+
// }
423444
} else {
424445
write!(file, "Not found any similar images.").unwrap();
425446
}
@@ -432,6 +453,19 @@ impl PrintResults for SimilarImages {
432453
fn print_results(&self) {
433454
if !self.similar_vectors.is_empty() {
434455
println!("Found {} images which have similar friends", self.similar_vectors.len());
456+
457+
for vec_file_entry in &self.similar_vectors {
458+
for file_entry in vec_file_entry {
459+
println!(
460+
"{} - {} - {} - {}",
461+
file_entry.path.display(),
462+
file_entry.dimensions,
463+
file_entry.size.file_size(options::BINARY).unwrap(),
464+
get_string_from_similarity(&file_entry.similarity)
465+
);
466+
}
467+
println!();
468+
}
435469
}
436470
}
437471
}

czkawka_gui/czkawka.glade

+2-2
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,8 @@ Author: Rafał Mikrut
135135
</object>
136136
<object class="GtkWindow" id="window_main">
137137
<property name="can_focus">False</property>
138-
<property name="default_width">1000</property>
139-
<property name="default_height">700</property>
138+
<property name="default_width">1100</property>
139+
<property name="default_height">800</property>
140140
<child>
141141
<object class="GtkBox">
142142
<property name="visible">True</property>

czkawka_gui/src/connect_button_search.rs

+18-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use czkawka_core::duplicate::DuplicateFinder;
88
use czkawka_core::empty_files::EmptyFiles;
99
use czkawka_core::empty_folder::EmptyFolder;
1010
use czkawka_core::same_music::{MusicSimilarity, SameMusic};
11-
use czkawka_core::similar_files::SimilarImages;
11+
use czkawka_core::similar_images::SimilarImages;
1212
use czkawka_core::temporary::Temporary;
1313
use czkawka_core::zeroed::ZeroedFiles;
1414
use glib::Sender;
@@ -42,6 +42,14 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
4242
let check_button_music_album_artist: gtk::CheckButton = gui_data.check_button_music_album_artist.clone();
4343
let check_button_music_year: gtk::CheckButton = gui_data.check_button_music_year.clone();
4444
let shared_buttons = gui_data.shared_buttons.clone();
45+
let scrolled_window_main_empty_folder_finder = gui_data.scrolled_window_main_empty_folder_finder.clone();
46+
let scrolled_window_main_empty_files_finder = gui_data.scrolled_window_main_empty_files_finder.clone();
47+
let scrolled_window_big_files_finder = gui_data.scrolled_window_big_files_finder.clone();
48+
let scrolled_window_duplicate_finder = gui_data.scrolled_window_duplicate_finder.clone();
49+
let scrolled_window_main_temporary_files_finder = gui_data.scrolled_window_main_temporary_files_finder.clone();
50+
let scrolled_window_same_music_finder = gui_data.scrolled_window_same_music_finder.clone();
51+
let scrolled_window_similar_images_finder = gui_data.scrolled_window_similar_images_finder.clone();
52+
let scrolled_window_zeroed_files_finder = gui_data.scrolled_window_zeroed_files_finder.clone();
4553

4654
buttons_search_clone.connect_clicked(move |_| {
4755
let included_directories = get_string_from_list_store(&scrolled_window_included_directories);
@@ -59,6 +67,8 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
5967

6068
match notebook_main_children_names.get(notebook_main.get_current_page().unwrap() as usize).unwrap().as_str() {
6169
"notebook_main_duplicate_finder_label" => {
70+
get_list_store(&scrolled_window_duplicate_finder).clear();
71+
6272
let check_method;
6373
if radio_button_name.get_active() {
6474
check_method = duplicate::CheckingMethod::Name;
@@ -93,6 +103,7 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
93103
});
94104
}
95105
"scrolled_window_main_empty_folder_finder" => {
106+
get_list_store(&scrolled_window_main_empty_folder_finder).clear();
96107
let sender = sender.clone();
97108
let receiver_stop = stop_receiver.clone();
98109

@@ -107,6 +118,7 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
107118
});
108119
}
109120
"scrolled_window_main_empty_files_finder" => {
121+
get_list_store(&scrolled_window_main_empty_files_finder).clear();
110122
let sender = sender.clone();
111123
let receiver_stop = stop_receiver.clone();
112124

@@ -124,6 +136,7 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
124136
});
125137
}
126138
"scrolled_window_main_temporary_files_finder" => {
139+
get_list_store(&scrolled_window_main_temporary_files_finder).clear();
127140
let sender = sender.clone();
128141
let receiver_stop = stop_receiver.clone();
129142

@@ -140,6 +153,7 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
140153
});
141154
}
142155
"notebook_big_main_file_finder" => {
156+
get_list_store(&scrolled_window_big_files_finder).clear();
143157
let numbers_of_files_to_check = match entry_big_files_number.get_text().as_str().parse::<usize>() {
144158
Ok(t) => t,
145159
Err(_) => 50, // By default
@@ -163,6 +177,7 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
163177
}
164178

165179
"notebook_main_similar_images_finder_label" => {
180+
get_list_store(&scrolled_window_similar_images_finder).clear();
166181
let sender = sender.clone();
167182
let receiver_stop = stop_receiver.clone();
168183

@@ -185,6 +200,7 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
185200
});
186201
}
187202
"notebook_main_zeroed_files_finder" => {
203+
get_list_store(&scrolled_window_zeroed_files_finder).clear();
188204
let sender = sender.clone();
189205
let receiver_stop = stop_receiver.clone();
190206

@@ -202,6 +218,7 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
202218
});
203219
}
204220
"notebook_main_same_music_finder" => {
221+
get_list_store(&scrolled_window_same_music_finder).clear();
205222
let minimal_file_size = match entry_same_music_minimal_size.get_text().as_str().parse::<u64>() {
206223
Ok(t) => t,
207224
Err(_) => 1024, // By default

0 commit comments

Comments
 (0)