From 8c7bc059f6eae3490a1afdae8afc1985b90ea04b Mon Sep 17 00:00:00 2001 From: Drew Short Date: Wed, 28 Aug 2019 10:45:56 -0500 Subject: [PATCH] Code cleanup * Renamed the FFI get_phashes to get_pihashes to reduce name collision with the phash implementation --- FFI-tests/ffi_test.py | 12 ++-- src/cache.rs | 47 +++++++++------- src/hash/ahash.rs | 6 +- src/hash/dhash.rs | 6 +- src/hash/mod.rs | 57 ++++++++++++++++--- src/hash/phash.rs | 95 ++++++++++++++++--------------- src/lib.rs | 128 ++++++++++++++++++++++++++++++++++++------ src/main.rs | 9 +-- 8 files changed, 256 insertions(+), 104 deletions(-) diff --git a/FFI-tests/ffi_test.py b/FFI-tests/ffi_test.py index 1f5bf48..1aecead 100755 --- a/FFI-tests/ffi_test.py +++ b/FFI-tests/ffi_test.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -from ctypes import * import os +from ctypes import * large_image1_path = "../test_images/sample_01_large.jpg".encode(encoding="utf-8") medium_image1_path = "../test_images/sample_01_medium.jpg".encode(encoding="utf-8") @@ -59,9 +59,9 @@ lib.ext_get_dhash.restype = c_ulonglong lib.ext_get_dhash.argtypes = [c_void_p, c_char_p] lib.ext_get_phash.restype = c_ulonglong lib.ext_get_phash.argtypes = [c_void_p, c_char_p] -lib.ext_get_phashes.restype = c_void_p -lib.ext_get_phashes.argtypes = [c_void_p, c_char_p] -lib.ext_free_phashes.argtypes = [c_void_p] +lib.ext_get_pihashes.restype = c_void_p +lib.ext_get_pihashes.argtypes = [c_void_p, c_char_p] +lib.ext_free_pihashes.argtypes = [c_void_p] # Takes a pointer and frees the struct at that memory location lib.ext_free.argtypes = [c_void_p] @@ -76,9 +76,9 @@ lib_struct = lib.ext_init("./.hash_cache".encode('utf-8')) for image in test_images: print("Requesting hashes for: %s"% image) - phashes = lib.ext_get_phashes(lib_struct, image) + phashes = lib.ext_get_pihashes(lib_struct, image) pihashes = PIHashes.from_address(phashes) - lib.ext_free_phashes(phashes) + lib.ext_free_pihashes(phashes) print("ahash: %i"% unsigned64(pihashes.ahash)) print("dhash: %i"% unsigned64(pihashes.dhash)) print("phash: %i"% unsigned64(pihashes.phash)) diff --git a/src/cache.rs b/src/cache.rs index 1860681..9b5552d 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -9,7 +9,7 @@ extern crate num; extern crate sha1; use std::default::Default; -use std::fs::{create_dir_all, remove_dir_all, File}; +use std::fs::{create_dir_all, File, remove_dir_all}; use std::io::{Error, ErrorKind, Read, Write}; use std::option::Option; use std::path::Path; @@ -18,9 +18,9 @@ use std::str::FromStr; use super::rustc_serialize::json; +use self::flate2::Compression; use self::flate2::read::ZlibDecoder; use self::flate2::write::ZlibEncoder; -use self::flate2::Compression; use self::image::DynamicImage; use self::sha1::Sha1; @@ -137,7 +137,7 @@ impl Cache { * Get the hash of the desired file and return it as a hex string */ pub fn get_file_hash(&self, path: &Path) -> Result { - let mut source = try!(File::open(&path)); + let mut source = File::open(&path)?; let mut buf: Vec = Vec::new(); source.read_to_end(&mut buf)?; let mut sha1 = Sha1::new(); @@ -293,7 +293,7 @@ impl Cache { let desire_len = row_str.len() - 1; row_str.truncate(desire_len); row_str.push_str("\n"); - try!(compressor.write(&row_str.into_bytes())); + compressor.write(&row_str.into_bytes())?; } let compressed_matrix = match compressor.finish() { Ok(data) => data, @@ -302,8 +302,8 @@ impl Cache { return Err(e); } }; - try!(file.write(&compressed_matrix)); - try!(file.flush()); + file.write(&compressed_matrix)?; + file.flush()?; } Err(e) => { return Err(e); @@ -379,20 +379,27 @@ impl Cache { } } -#[test] -fn test_get_file_hash() { - let target = "test_images/sample_01_large.jpg"; - let target_path = Path::new(target); - let cache: Cache = Default::default(); - let hash = cache.get_file_hash(&target_path); - match hash { - Ok(v) => { - println!("Hash: {}", v); - assert_eq!(v, String::from("4beb6f2d852b75a313863916a1803ebad13a3196")); - } - Err(e) => { - println!("Error: {:?}", e); - assert!(false); +#[cfg(test)] +mod tests { + use std::path::Path; + + use cache::Cache; + + #[test] + fn test_get_file_hash() { + let target = "test_images/sample_01_large.jpg"; + let target_path = Path::new(target); + let cache: Cache = Default::default(); + let hash = cache.get_file_hash(&target_path); + match hash { + Ok(v) => { + println!("Hash: {}", v); + assert_eq!(v, String::from("4beb6f2d852b75a313863916a1803ebad13a3196")); + } + Err(e) => { + println!("Error: {:?}", e); + assert!(false); + } } } } diff --git a/src/hash/ahash.rs b/src/hash/ahash.rs index e237143..1301d78 100644 --- a/src/hash/ahash.rs +++ b/src/hash/ahash.rs @@ -6,10 +6,9 @@ use std::path::Path; use cache::Cache; -use super::prepare_image; use super::{HashType, PerceptualHash, Precision, PreparedImage}; - use super::image::GenericImageView; +use super::prepare_image; pub struct AHash { prepared_image: Box, @@ -63,3 +62,6 @@ impl PerceptualHash for AHash { } } } + +#[cfg(test)] +mod tests {} diff --git a/src/hash/dhash.rs b/src/hash/dhash.rs index 6ddc86e..a4e0735 100644 --- a/src/hash/dhash.rs +++ b/src/hash/dhash.rs @@ -6,10 +6,9 @@ use std::path::Path; use cache::Cache; -use super::prepare_image; use super::{HashType, PerceptualHash, Precision, PreparedImage}; - use super::image::GenericImageView; +use super::prepare_image; pub struct DHash { prepared_image: Box, @@ -69,3 +68,6 @@ impl PerceptualHash for DHash { } } } + +#[cfg(test)] +mod tests {} diff --git a/src/hash/mod.rs b/src/hash/mod.rs index fa06c80..ab30724 100644 --- a/src/hash/mod.rs +++ b/src/hash/mod.rs @@ -8,8 +8,11 @@ extern crate image; use std::f64; use std::fmt; +use std::fmt::{Error, Formatter}; use std::path::Path; +use serde::export::fmt::Debug; + use cache::Cache; use self::image::FilterType; @@ -52,22 +55,37 @@ pub struct PreparedImage { /** * Wraps the various perceptual hashes */ +#[derive(Debug)] pub struct PerceptualHashes { pub orig_path: String, pub ahash: u64, pub dhash: u64, - pub phash: u64, + pub phash: u64 +} + +impl PartialEq for PerceptualHashes { + fn eq(&self, other: &Self) -> bool { + return self.ahash == other.ahash + && self.dhash == other.dhash + && self.phash == other.phash; + } + + fn ne(&self, other: &Self) -> bool { + return self.ahash != other.ahash + || self.dhash != other.dhash + || self.phash != other.phash; + } } impl PerceptualHashes { pub fn similar(&self, other: &PerceptualHashes) -> bool { if self.orig_path != other.orig_path && calculate_hamming_distance(self.ahash, other.ahash) - <= HAMMING_DISTANCE_SIMILARITY_LIMIT + <= HAMMING_DISTANCE_SIMILARITY_LIMIT && calculate_hamming_distance(self.dhash, other.dhash) - <= HAMMING_DISTANCE_SIMILARITY_LIMIT + <= HAMMING_DISTANCE_SIMILARITY_LIMIT && calculate_hamming_distance(self.phash, other.phash) - <= HAMMING_DISTANCE_SIMILARITY_LIMIT + <= HAMMING_DISTANCE_SIMILARITY_LIMIT { true } else { @@ -130,7 +148,7 @@ pub trait PerceptualHash { // Functions // /** - * Resonsible for parsing a path, converting an image and package it to be + * Responsible for parsing a path, converting an image and package it to be * hashed. * * # Arguments @@ -234,9 +252,9 @@ pub fn get_perceptual_hashes( let phash = phash::PHash::new(&path, &precision, &cache).get_hash(&cache); PerceptualHashes { orig_path: String::from(&*image_path), - ahash: ahash, - dhash: dhash, - phash: phash, + ahash, + dhash, + phash, } } @@ -250,3 +268,26 @@ pub fn calculate_hamming_distance(hash1: u64, hash2: u64) -> u64 { // the number of 1's in the difference to determine the hamming distance (hash1 ^ hash2).count_ones() as u64 } + +#[cfg(test)] +mod tests { + use hash::calculate_hamming_distance; + + #[test] + fn test_no_hamming_distance() { + let hamming_distance = calculate_hamming_distance(0, 0); + assert_eq!(hamming_distance, 0); + } + + #[test] + fn test_one_hamming_distance() { + let hamming_distance = calculate_hamming_distance(0, 1); + assert_eq!(hamming_distance, 1); + } + + #[test] + fn test_two_hamming_distance() { + let hamming_distance = calculate_hamming_distance(0, 3); + assert_eq!(hamming_distance, 2); + } +} diff --git a/src/hash/phash.rs b/src/hash/phash.rs index d2f2069..f9940d3 100644 --- a/src/hash/phash.rs +++ b/src/hash/phash.rs @@ -6,11 +6,11 @@ use std::path::Path; use cache::Cache; +use super::{HashType, PerceptualHash, Precision, PreparedImage}; use super::dft; use super::dft::Transform; use super::image::{DynamicImage, GenericImageView, Pixel}; use super::prepare_image; -use super::{HashType, PerceptualHash, Precision, PreparedImage}; pub struct PHash { prepared_image: Box, @@ -41,7 +41,7 @@ impl PerceptualHash for PHash { // Get 2d data to 2d FFT/DFT // Either from the cache or calculate it // Pretty fast already, so caching doesn't make a huge difference - // Atleast compared to opening and processing the images + // At least compared to opening and processing the images let data_matrix: Vec> = match *cache { Some(ref c) => { match c.get_matrix_from_cache( @@ -117,11 +117,11 @@ fn create_data_matrix(width: u32, height: u32, image: &DynamicImage) -> Vec f64 { } } -#[test] -fn test_2d_dft() { - let mut test_matrix: Vec> = Vec::new(); - test_matrix.push(vec![1f64, 1f64, 1f64, 3f64]); - test_matrix.push(vec![1f64, 2f64, 2f64, 1f64]); - test_matrix.push(vec![1f64, 2f64, 2f64, 1f64]); - test_matrix.push(vec![3f64, 1f64, 1f64, 1f64]); - - println!("{:?}", test_matrix[0]); - println!("{:?}", test_matrix[1]); - println!("{:?}", test_matrix[2]); - println!("{:?}", test_matrix[3]); - - println!("Performing 2d DFT"); - calculate_2d_dft(&mut test_matrix); - - println!("{:?}", test_matrix[0]); - println!("{:?}", test_matrix[1]); - println!("{:?}", test_matrix[2]); - println!("{:?}", test_matrix[3]); - - assert!(test_matrix[0][0] == 24_f64); - assert!(test_matrix[0][1] == 0_f64); - assert!(test_matrix[0][2] == 0_f64); - assert!(test_matrix[0][3] == 0_f64); - - assert!(test_matrix[1][0] == 0_f64); - assert!(test_matrix[1][1] == 0_f64); - assert!(test_matrix[1][2] == -2_f64); - assert!(test_matrix[1][3] == 2_f64); - - assert!(test_matrix[2][0] == 0_f64); - assert!(test_matrix[2][1] == -2_f64); - assert!(test_matrix[2][2] == -4_f64); - assert!(test_matrix[2][3] == -2_f64); - - assert!(test_matrix[3][0] == 0_f64); - assert!(test_matrix[3][1] == 2_f64); - assert!(test_matrix[3][2] == -2_f64); - assert!(test_matrix[3][3] == 0_f64); +#[cfg(test)] +mod tests { + use hash::phash::calculate_2d_dft; + + #[test] + fn test_2d_dft() { + let mut test_matrix: Vec> = Vec::new(); + test_matrix.push(vec![1f64, 1f64, 1f64, 3f64]); + test_matrix.push(vec![1f64, 2f64, 2f64, 1f64]); + test_matrix.push(vec![1f64, 2f64, 2f64, 1f64]); + test_matrix.push(vec![3f64, 1f64, 1f64, 1f64]); + + println!("2d matrix before DFT"); + println!("{:?}", test_matrix[0]); + println!("{:?}", test_matrix[1]); + println!("{:?}", test_matrix[2]); + println!("{:?}", test_matrix[3]); + + println!("Performing 2d DFT"); + calculate_2d_dft(&mut test_matrix); + + println!("2d matrix after DFT"); + println!("{:?}", test_matrix[0]); + println!("{:?}", test_matrix[1]); + println!("{:?}", test_matrix[2]); + println!("{:?}", test_matrix[3]); + + assert_eq!(test_matrix[0][0], 24_f64); + assert_eq!(test_matrix[0][1], 0_f64); + assert_eq!(test_matrix[0][2], 0_f64); + assert_eq!(test_matrix[0][3], 0_f64); + + assert_eq!(test_matrix[1][0], 0_f64); + assert_eq!(test_matrix[1][1], 0_f64); + assert_eq!(test_matrix[1][2], -2_f64); + assert_eq!(test_matrix[1][3], 2_f64); + + assert_eq!(test_matrix[2][0], 0_f64); + assert_eq!(test_matrix[2][1], -2_f64); + assert_eq!(test_matrix[2][2], -4_f64); + assert_eq!(test_matrix[2][3], -2_f64); + + assert_eq!(test_matrix[3][0], 0_f64); + assert_eq!(test_matrix[3][1], 2_f64); + assert_eq!(test_matrix[3][2], -2_f64); + assert_eq!(test_matrix[3][3], 0_f64); + } } diff --git a/src/lib.rs b/src/lib.rs index c252a68..2ad327d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,7 @@ extern crate libc; extern crate rustc_serialize; +extern crate serde; #[cfg(feature = "bench")] extern crate test; @@ -57,7 +58,7 @@ impl PIHash { hash::get_perceptual_hash(&path, &precision, &hash_type, &self.cache) } - pub fn get_phashes(&self, path: &Path) -> hash::PerceptualHashes { + pub fn get_pihashes(&self, path: &Path) -> hash::PerceptualHashes { hash::get_perceptual_hashes(&path, &hash::Precision::Medium, &self.cache) } @@ -159,24 +160,24 @@ pub struct PIHashes { } #[no_mangle] -pub extern "C" fn ext_get_phashes(lib: &PIHash, path_char: *const libc::c_char) -> *mut PIHashes { +pub extern "C" fn ext_get_pihashes(lib: &PIHash, path_char: *const libc::c_char) -> *mut PIHashes { unsafe { let path_str = CStr::from_ptr(path_char); let image_path = get_str_from_cstr(path_str); let path = Path::new(&image_path); - let phashes = lib.get_phashes(path); + let pihashes = lib.get_pihashes(path); Box::into_raw(Box::new(PIHashes { - ahash: phashes.ahash, - dhash: phashes.dhash, - phash: phashes.phash, + ahash: pihashes.ahash, + dhash: pihashes.dhash, + phash: pihashes.phash, })) } } #[no_mangle] -pub extern "C" fn ext_free_phashes(raw_phashes: *const libc::c_void) { +pub extern "C" fn ext_free_pihashes(raw_pihashes: *const libc::c_void) { unsafe { - drop(Box::from_raw(raw_phashes as *mut PIHashes)); + drop(Box::from_raw(raw_pihashes as *mut PIHashes)); } } @@ -213,10 +214,11 @@ mod tests { use cache; use hash; + use hash::{PerceptualHash, PerceptualHashes}; + use super::PIHash; #[cfg(feature = "bench")] use super::test::Bencher; - use super::PIHash; thread_local!(static LIB: PIHash = PIHash::new(Some(cache::DEFAULT_CACHE_DIR))); thread_local!(static NO_CACHE_LIB: PIHash = PIHash::new(None)); @@ -250,7 +252,7 @@ mod tests { * Updated test function. Assumes 3 images to a set and no hamming distances. * We don't need to confirm that the hamming distance calculation works in these tests. */ - fn test_imageset_hash( + fn test_image_set_hash( hash_type: hash::HashType, hash_precision: hash::Precision, max_hamming_distance: u64, @@ -302,7 +304,7 @@ mod tests { image_hashes: [u64; 3], ) { LIB.with(|lib| { - test_imageset_hash( + test_image_set_hash( hash_type, hash_precision, max_hamming_distance, @@ -312,7 +314,7 @@ mod tests { ); }); NO_CACHE_LIB.with(|lib| { - test_imageset_hash( + test_image_set_hash( hash_type, hash_precision, max_hamming_distance, @@ -323,6 +325,65 @@ mod tests { }); } + /** + * Updated test function. Assumes 3 images to a set and no hamming distances. + * We don't need to confirm that the hamming distance calculation works in these tests. + */ + fn test_images_hashes( + image_hashes: &[PerceptualHashes], + lib: &PIHash, + ) { + let mut hashes = vec![]; + for index in 0..image_hashes.len() { +// println!("{}, {:?}", index, image_paths[index]); + let image_path = Path::new(&image_hashes[index].orig_path); + let calculated_hash = lib.get_pihashes(&image_path); + println!( + "Image hashes expected: [{:?}] actual: [{:?}]", + image_hashes[index], + calculated_hash + ); + hashes.push(calculated_hash); + } + for index in 0..image_hashes.len() { + assert_eq!(hashes[index], image_hashes[index]); + } +// +// for index in 0..hashes.len() { +// for index2 in 0..hashes.len() { +// if index == index2 { +// continue; +// } else { +// let distance = hash::calculate_hamming_distance(hashes[index], hashes[index2]); +// println!("Hashes [{}] and [{}] have a hamming distance of [{}] of a max allowed distance of [{}]", +// hashes[index], +// hashes[index2], +// distance, +// max_hamming_distance); +// assert!(distance <= max_hamming_distance); +// } +// } +// } + } + + /** + * Test images with and without caching + */ + fn test_images(image_hashes: &[PerceptualHashes]) { + LIB.with(|lib| { + test_images_hashes( + &image_hashes, + lib, + ); + }); + NO_CACHE_LIB.with(|lib| { + test_images_hashes( + &image_hashes, + lib, + ); + }); + } + #[test] fn test_confirm_ahash_results_sample_01() { let sample_01_images: [&Path; 3] = [ @@ -392,7 +453,7 @@ mod tests { 18446460933225054208, ]; LIB.with(|lib| { - test_imageset_hash( + test_image_set_hash( hash::HashType::AHash, hash::Precision::Medium, 0u64, @@ -416,7 +477,7 @@ mod tests { 3404580580803739582, ]; LIB.with(|lib| { - test_imageset_hash( + test_image_set_hash( hash::HashType::DHash, hash::Precision::Medium, 0u64, @@ -440,7 +501,7 @@ mod tests { 14726771606135242753, ]; LIB.with(|lib| { - test_imageset_hash( + test_image_set_hash( hash::HashType::DHash, hash::Precision::Medium, 0u64, @@ -461,7 +522,7 @@ mod tests { let sample_03_hashes: [u64; 3] = [144115181601817086, 144115181601817086, 144115181601817086]; LIB.with(|lib| { - test_imageset_hash( + test_image_set_hash( hash::HashType::DHash, hash::Precision::Medium, 0u64, @@ -485,7 +546,7 @@ mod tests { 18374262188442386433, ]; LIB.with(|lib| { - test_imageset_hash( + test_image_set_hash( hash::HashType::DHash, hash::Precision::Medium, 0u64, @@ -576,6 +637,37 @@ mod tests { ); } + #[test] + fn test_confirm_pihash_results() { + let sample_hashes: [PerceptualHashes; 4] = [ + PerceptualHashes { + orig_path: "./test_images/sample_01_large.jpg".to_string(), + ahash: 857051991849750, + dhash: 3404580580803739582, + phash: 72357778504597504, + }, + PerceptualHashes { + orig_path: "./test_images/sample_02_large.jpg".to_string(), + ahash: 18446744073441116160, + dhash: 14726771606135242753, + phash: 5332332327550844928, + }, + PerceptualHashes { + orig_path: "./test_images/sample_03_large.jpg".to_string(), + ahash: 135670932300497406, + dhash: 144115181601817086, + phash: 6917529027641081856, + }, + PerceptualHashes { + orig_path: "./test_images/sample_04_large.jpg".to_string(), + ahash: 18446460933225054208, + dhash: 18374262188442386433, + phash: 10997931646002397184, + } + ]; + test_images(&sample_hashes); + } + #[cfg(feature = "bench")] #[bench] fn bench_with_cache(bench: &mut Bencher) -> () { @@ -583,7 +675,7 @@ mod tests { let lib = PIHash::new(Some(cache::DEFAULT_CACHE_DIR)); // Setup the caches to make sure we're good to properly bench - // All phashes so that the matricies are pulled from cache as well + // All pihashes so that the matrices are pulled from cache as well lib.get_perceptual_hash( &Path::new("./test_images/sample_01_large.jpg"), &hash::Precision::Medium, diff --git a/src/main.rs b/src/main.rs index 9870969..5c2d9de 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,9 +9,10 @@ extern crate rustc_serialize; #[macro_use] extern crate serde_derive; -use docopt::Docopt; use std::path::Path; +use docopt::Docopt; + // Getting the version information from cargo during compile time const VERSION: &'static str = env!("CARGO_PKG_VERSION"); @@ -139,8 +140,8 @@ fn get_requested_perceptual_hashes( pihash::hash::PerceptualHashes { orig_path: String::from(image_path.to_str().unwrap()), - ahash: ahash, - dhash: dhash, - phash: phash, + ahash, + dhash, + phash, } }