Browse Source

Here come the phashes

develop
Drew Short 9 years ago
parent
commit
9f6423932c
  1. 2
      Cargo.toml
  2. 162
      src/hash.rs
  3. 87
      src/lib.rs

2
Cargo.toml

@ -7,3 +7,5 @@ authors = ["Drew Short <warrick@sothr.com>"]
docopt = "*" docopt = "*"
rustc-serialize = "*" rustc-serialize = "*"
image = "*" image = "*"
complex = "*"
dft = "*"

162
src/hash.rs

@ -5,6 +5,8 @@
// Pull in the image processing crate // Pull in the image processing crate
extern crate image; extern crate image;
extern crate dft;
extern crate complex;
use std::path::Path; use std::path::Path;
use self::image::{ use self::image::{
@ -12,6 +14,8 @@ use self::image::{
Pixel, Pixel,
FilterType FilterType
}; };
use self::dft::real;
use self::complex::*;
/** /**
* Prepared image that can be used to generate hashes * Prepared image that can be used to generate hashes
@ -56,10 +60,12 @@ pub fn prepare_image(path: &Path, size: u32) -> PreparedImage {
/** /**
* Get all perceptual hashes for an image * Get all perceptual hashes for an image
*/ */
pub fn get_perceptual_hashes(path: &Path, size: u32, phash_size: u32) -> PerceptualHashes {
pub fn get_perceptual_hashes(path: &Path, size: u32) -> PerceptualHashes {
let image_path = path.to_str().unwrap(); let image_path = path.to_str().unwrap();
let prepared_image = prepare_image(path, size); let prepared_image = prepare_image(path, size);
let phash_prepared_image = prepare_image(path, phash_size);
// phash uses a DFT, so it needs an image 4 times larger to work with for
// the same precision of hash. That said, this hash is much more accurate.
let phash_prepared_image = prepare_image(path, size*4);
let ahash = get_ahash(&prepared_image); let ahash = get_ahash(&prepared_image);
let dhash = get_dhash(&prepared_image); let dhash = get_dhash(&prepared_image);
let phash = get_phash(&phash_prepared_image); let phash = get_phash(&phash_prepared_image);
@ -125,7 +131,7 @@ pub fn get_ahash(prepared_image: &PreparedImage) -> u64 {
} }
//println!("Hash for {} is {}", prepared_image.orig_path, hash); //println!("Hash for {} is {}", prepared_image.orig_path, hash);
return hash;
hash
} }
/** /**
@ -169,7 +175,62 @@ pub fn get_dhash(prepared_image: &PreparedImage) -> u64 {
hash |= 0; hash |= 0;
} }
return hash;
hash
}
/*
* Use a 1D DFT to cacluate the 2D DFT.
*
* This is achieved by calculating the DFT for each row, then calculating the
* DFT for each column of DFT row data. This means that a 32x32 image with have
* 1024 1D DFT operations performed on it. (Slightly caclulation intensive)
*
* This operation is in place on the data in the provided vector
*
* Inspired by:
* http://www.inf.ufsc.br/~visao/khoros/html-dip/c5/s2/front-page.html
*
* Checked with:
* http://calculator.vhex.net/post/calculator-result/2d-discrete-fourier-transform
*/
fn calculate_2d_dft(data_matrix: &mut Vec<Vec<f64>>){
//println!("{:?}", data_matrix);
let width = data_matrix.len();
let height = data_matrix[0].len();
let mut complex_data_matrix = Vec::with_capacity(width);
// Perform DCT on the columns of data
for x in 0..width {
let mut column: Vec<f64> = Vec::with_capacity(height);
for y in 0..height {
column.push(data_matrix[x][y]);
}
// Perform the DCT on this column
//println!("column[{}] before: {:?}", x, column);
real::forward(&mut column);
let complex_column = real::unpack(&column);
//println!("column[{}] after: {:?}", x, complex_column);
complex_data_matrix.push(complex_column);
}
// Perform DCT on the rows of data
for y in 0..height {
let mut row = Vec::with_capacity(width);
for x in 0..width {
row.push(complex_data_matrix[x][y]);
}
// Perform DCT on the row
//println!("row[{}] before: {:?}", y, row);
dft::complex::forward(&mut row);
//println!("row[{}] after: {:?}", y, row);
// Put the row values back
for x in 0..width {
data_matrix[x][y] = row[x].re();
}
}
} }
/** /**
@ -184,5 +245,96 @@ pub fn get_dhash(prepared_image: &PreparedImage) -> u64 {
* Returns a u64 representing the value of the hash * Returns a u64 representing the value of the hash
*/ */
pub fn get_phash(prepared_image: &PreparedImage) -> u64 { pub fn get_phash(prepared_image: &PreparedImage) -> u64 {
0u64
// Get the image data into a vector to perform the DFT on.
let width = prepared_image.image.width() as usize;
let height = prepared_image.image.height() as usize;
// Get 2d data to 2d FFT/DFT
let mut data_matrix: Vec<Vec<f64>> = Vec::new();
for x in (0..width) {
data_matrix.push(Vec::new());
for y in (0..height) {
let pos_x = x as u32;
let pos_y = y as u32;
data_matrix[x].push(prepared_image.image.get_pixel(pos_x,pos_y).channels()[0] as f64);
}
}
// Perform the 2D DFT operation on our matrix
calculate_2d_dft(&mut data_matrix);
// Only need the top left quadrant
let target_width = (width / 4) as usize;
let target_height = (height / 4) as usize;
let dft_width = (width / 4) as f64;
let dft_height = (height / 4) as f64;
//Calculate the mean
let mut total = 0f64;
for x in (0..target_width) {
for y in (0..target_height) {
total += data_matrix[x][y];
}
}
let mean = total / (dft_width * dft_height);
// Calculating a hash based on the mean
let mut hash = 0u64;
for x in (0..target_width) {
// println!("Mean: {} Values: {:?}",mean,data_matrix[x]);
for y in (0..target_height) {
if data_matrix[x][y] >= mean {
hash |= 1;
//println!("Pixel {} is >= {} therefore {:b}", pixel_sum, mean, hash);
} else {
hash |= 0;
//println!("Pixel {} is < {} therefore {:b}", pixel_sum, mean, hash);
}
hash <<= 1;
}
}
//println!("Hash for {} is {}", prepared_image.orig_path, hash);
hash
}
#[test]
fn test_2d_dft() {
let mut test_matrix: Vec<Vec<f64>> = Vec::new();
test_matrix.push(vec![1f64,1f64,1f64,3f64]);
test_matrix.push(vec![1f64,2f64,2f64,1f64]);
test_matrix.push(vec![1f64,2f64,2f64,1f64]);
test_matrix.push(vec![3f64,1f64,1f64,1f64]);
println!("{:?}",test_matrix[0]);
println!("{:?}",test_matrix[1]);
println!("{:?}",test_matrix[2]);
println!("{:?}",test_matrix[3]);
println!("Performing 2d DFT");
calculate_2d_dft(&mut test_matrix);
println!("{:?}",test_matrix[0]);
println!("{:?}",test_matrix[1]);
println!("{:?}",test_matrix[2]);
println!("{:?}",test_matrix[3]);
assert!(test_matrix[0][0] == 24f64);
assert!(test_matrix[0][1] == 0f64);
assert!(test_matrix[0][2] == 0f64);
assert!(test_matrix[0][3] == 0f64);
assert!(test_matrix[1][0] == 0f64);
assert!(test_matrix[1][1] == -0.0000000000000006661338147750939f64);
assert!(test_matrix[1][2] == -2.0000000000000004f64);
assert!(test_matrix[1][3] == 1.9999999999999993f64);
assert!(test_matrix[2][0] == 0f64);
assert!(test_matrix[2][1] == -2f64);
assert!(test_matrix[2][2] == -4f64);
assert!(test_matrix[2][3] == -2f64);
assert!(test_matrix[3][0] == 0f64);
assert!(test_matrix[3][1] == 2.000000000000001f64);
assert!(test_matrix[3][2] == -1.9999999999999996f64);
assert!(test_matrix[3][3] == 0.0000000000000006661338147750939f64);
} }

87
src/lib.rs

@ -27,7 +27,7 @@ pub fn hello(mut result: String) -> String {
} }
pub fn get_phashes(path: &Path) -> hash::PerceptualHashes { pub fn get_phashes(path: &Path) -> hash::PerceptualHashes {
hash::get_perceptual_hashes(path, 8, 32)
hash::get_perceptual_hashes(path, 8)
} }
pub fn get_ahash(path: &Path) -> u64 { pub fn get_ahash(path: &Path) -> u64 {
@ -84,8 +84,9 @@ mod tests {
// Simple function for the unit tests to succinctly test a set of images // Simple function for the unit tests to succinctly test a set of images
// that are organized in the fashion of large->medium->small // that are organized in the fashion of large->medium->small
fn test_imageset_hash<F: Fn(hash::PreparedImage) -> u64>(
fn test_imageset_hash<F: Fn(&hash::PreparedImage) -> u64>(
hash_func: F, hash_func: F,
resize_target: u32,
large_path: &str, large_path: &str,
medium_path: &str, medium_path: &str,
small_path: &str, small_path: &str,
@ -96,13 +97,13 @@ mod tests {
expected_large_small_hamming: u64, expected_large_small_hamming: u64,
expected_medium_small_hamming: u64) { expected_medium_small_hamming: u64) {
let large_prepared_image = hash::prepare_image(path::Path::new(large_path), 8u32);
let medium_prepared_image = hash::prepare_image(path::Path::new(medium_path), 8u32);
let small_prepared_image = hash::prepare_image(path::Path::new(small_path), 8u32);
let large_prepared_image = hash::prepare_image(path::Path::new(large_path), resize_target);
let medium_prepared_image = hash::prepare_image(path::Path::new(medium_path), resize_target);
let small_prepared_image = hash::prepare_image(path::Path::new(small_path), resize_target);
let actual_large_hash = hash_func(large_prepared_image);
let actual_medium_hash = hash_func(medium_prepared_image);
let actual_small_hash = hash_func(small_prepared_image);
let actual_large_hash = hash_func(&large_prepared_image);
let actual_medium_hash = hash_func(&medium_prepared_image);
let actual_small_hash = hash_func(&small_prepared_image);
// println for the purpose of debugging // println for the purpose of debugging
println!("{}: expected: {} actual: {}", large_path, expected_large_hash, actual_large_hash); println!("{}: expected: {} actual: {}", large_path, expected_large_hash, actual_large_hash);
@ -133,6 +134,7 @@ mod tests {
// Sample_01 tests // Sample_01 tests
test_imageset_hash( test_imageset_hash(
hash::get_ahash, hash::get_ahash,
8u32,
"./test_images/sample_01_large.jpg", "./test_images/sample_01_large.jpg",
"./test_images/sample_01_medium.jpg", "./test_images/sample_01_medium.jpg",
"./test_images/sample_01_small.jpg", "./test_images/sample_01_small.jpg",
@ -147,6 +149,7 @@ mod tests {
// Sample_02 tests // Sample_02 tests
test_imageset_hash( test_imageset_hash(
hash::get_ahash, hash::get_ahash,
8u32,
"./test_images/sample_02_large.jpg", "./test_images/sample_02_large.jpg",
"./test_images/sample_02_medium.jpg", "./test_images/sample_02_medium.jpg",
"./test_images/sample_02_small.jpg", "./test_images/sample_02_small.jpg",
@ -160,6 +163,7 @@ mod tests {
// Sample_03 tests // Sample_03 tests
test_imageset_hash( test_imageset_hash(
hash::get_ahash, hash::get_ahash,
8u32,
"./test_images/sample_03_large.jpg", "./test_images/sample_03_large.jpg",
"./test_images/sample_03_medium.jpg", "./test_images/sample_03_medium.jpg",
"./test_images/sample_03_small.jpg", "./test_images/sample_03_small.jpg",
@ -174,6 +178,7 @@ mod tests {
// Sample_04 tests // Sample_04 tests
test_imageset_hash( test_imageset_hash(
hash::get_ahash, hash::get_ahash,
8u32,
"./test_images/sample_04_large.jpg", "./test_images/sample_04_large.jpg",
"./test_images/sample_04_medium.jpg", "./test_images/sample_04_medium.jpg",
"./test_images/sample_04_small.jpg", "./test_images/sample_04_small.jpg",
@ -191,6 +196,7 @@ mod tests {
// Sample_01 tests // Sample_01 tests
test_imageset_hash( test_imageset_hash(
hash::get_dhash, hash::get_dhash,
8u32,
"./test_images/sample_01_large.jpg", "./test_images/sample_01_large.jpg",
"./test_images/sample_01_medium.jpg", "./test_images/sample_01_medium.jpg",
"./test_images/sample_01_small.jpg", "./test_images/sample_01_small.jpg",
@ -205,6 +211,7 @@ mod tests {
// Sample_02 tests // Sample_02 tests
test_imageset_hash( test_imageset_hash(
hash::get_dhash, hash::get_dhash,
8u32,
"./test_images/sample_02_large.jpg", "./test_images/sample_02_large.jpg",
"./test_images/sample_02_medium.jpg", "./test_images/sample_02_medium.jpg",
"./test_images/sample_02_small.jpg", "./test_images/sample_02_small.jpg",
@ -218,6 +225,7 @@ mod tests {
// Sample_03 tests // Sample_03 tests
test_imageset_hash( test_imageset_hash(
hash::get_dhash, hash::get_dhash,
8u32,
"./test_images/sample_03_large.jpg", "./test_images/sample_03_large.jpg",
"./test_images/sample_03_medium.jpg", "./test_images/sample_03_medium.jpg",
"./test_images/sample_03_small.jpg", "./test_images/sample_03_small.jpg",
@ -232,6 +240,7 @@ mod tests {
// Sample_04 tests // Sample_04 tests
test_imageset_hash( test_imageset_hash(
hash::get_dhash, hash::get_dhash,
8u32,
"./test_images/sample_04_large.jpg", "./test_images/sample_04_large.jpg",
"./test_images/sample_04_medium.jpg", "./test_images/sample_04_medium.jpg",
"./test_images/sample_04_small.jpg", "./test_images/sample_04_small.jpg",
@ -243,4 +252,66 @@ mod tests {
0u64 0u64
); );
} }
#[test]
fn confirm_phash_results() {
// Sample_01 tests
test_imageset_hash(
hash::get_phash,
32u32,
"./test_images/sample_01_large.jpg",
"./test_images/sample_01_medium.jpg",
"./test_images/sample_01_small.jpg",
72357778504597504,
72357778504597504,
72357778504597504,
0u64,
0u64,
0u64
);
// Sample_02 tests
test_imageset_hash(
hash::get_phash,
32u32,
"./test_images/sample_02_large.jpg",
"./test_images/sample_02_medium.jpg",
"./test_images/sample_02_small.jpg",
5332332327550844928,
5332332327550844928,
5332332327550844928,
0u64,
0u64,
0u64
);
// Sample_03 tests
test_imageset_hash(
hash::get_phash,
32u32,
"./test_images/sample_03_large.jpg",
"./test_images/sample_03_medium.jpg",
"./test_images/sample_03_small.jpg",
6917529027641081856,
6917529027641081856,
6917529027641081856,
0u64,
0u64,
0u64
);
// Sample_04 tests
test_imageset_hash(
hash::get_phash,
32u32,
"./test_images/sample_04_large.jpg",
"./test_images/sample_04_medium.jpg",
"./test_images/sample_04_small.jpg",
10997931646002397184,
10997931646002397184,
11142046834078253056,
0u64,
1u64,
1u64
);
}
} }
Loading…
Cancel
Save