From 8d9161bef31c952bfa30ccf7ef6d5de6c8c39192 Mon Sep 17 00:00:00 2001 From: Drew Short Date: Tue, 5 Jan 2016 17:55:12 -0600 Subject: [PATCH] Added Compression To Cached Matrix Data Compression using flate2 has been added to the caching controller Made the hash code call for cached matricies before calculating them again if possible Changed the FFI tests to be a bit more dynamic. That said, I just noticed that python hates unsigned 64bit numbers. Need to find a way around this. I should be able to compare the native Rust code against the python FFI and get the same results for a specific file. --- Cargo.toml | 1 + FFI-tests/ffi_test.py | 40 ++++++++++++++++++++++------------------ src/cache.rs | 29 ++++++++++++++++++++++++----- src/hash.rs | 35 ++++++++++++++++++++++------------- 4 files changed, 69 insertions(+), 36 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1232267..fde19d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,3 +20,4 @@ complex = "0.8.0" dft = "0.4.1" sha1 = "0.1.1" libc = "0.2.4" +flate2 = "0.2.11" diff --git a/FFI-tests/ffi_test.py b/FFI-tests/ffi_test.py index eafa4bb..a8ef21c 100755 --- a/FFI-tests/ffi_test.py +++ b/FFI-tests/ffi_test.py @@ -1,12 +1,23 @@ #!/usr/bin/env python3 +from ctypes import * from _ffi_test_py import ffi, lib -large_image_path = "test_images/sample_01_large.jpg".encode(encoding="utf-8") -medium_image_path = "test_images/sample_01_medium.jpg".encode(encoding="utf-8") -small_image_path = "test_images/sample_01_small.jpg".encode(encoding="utf-8") +large_image1_path = "test_images/sample_01_large.jpg".encode(encoding="utf-8") +medium_image1_path = "test_images/sample_01_medium.jpg".encode(encoding="utf-8") +small_image1_path = "test_images/sample_01_small.jpg".encode(encoding="utf-8") -print("starting test") +large_image2_path = "test_images/sample_02_large.jpg".encode(encoding="utf-8") +medium_image2_path = "test_images/sample_02_medium.jpg".encode(encoding="utf-8") +small_image2_path = "test_images/sample_02_small.jpg".encode(encoding="utf-8") + +large_image3_path = "test_images/sample_03_large.jpg".encode(encoding="utf-8") +medium_image3_path = "test_images/sample_03_medium.jpg".encode(encoding="utf-8") +small_image3_path = "test_images/sample_03_small.jpg".encode(encoding="utf-8") + +test_images=[large_image1_path, medium_image1_path, small_image1_path,large_image2_path, medium_image2_path, small_image2_path,large_image3_path, medium_image3_path, small_image3_path] + +print("starting ffi test") #initialize the library lib.init() @@ -15,20 +26,13 @@ lib.init() #print(large_image_path) #print('\\x'+'\\x'.join('{:02x}'.format(x) for x in large_image_path)) -print("Get hashes for {}", large_image_path) -print("AHash: {}",lib.ext_get_ahash(large_image_path)) -print("DHash: {}",lib.ext_get_dhash(large_image_path)) -print("PHash: {}",lib.ext_get_phash(large_image_path)) - -print("Get hashes for {}", medium_image_path) -print("AHash: {}",lib.ext_get_ahash(medium_image_path)) -print("DHash: {}",lib.ext_get_dhash(medium_image_path)) -print("PHash: {}",lib.ext_get_phash(medium_image_path)) - -print("Get hashes for {}", small_image_path) -print("AHash: {}",lib.ext_get_ahash(small_image_path)) -print("DHash: {}",lib.ext_get_dhash(small_image_path)) -print("PHash: {}",lib.ext_get_phash(small_image_path)) +for image in test_images: + print("Get hashes for {}", image) + print("AHash: {}",lib.ext_get_ahash(image) & 0xffffffffffffffff) + print("DHash: {}",lib.ext_get_dhash(image) & 0xffffffffffffffff) + print("PHash: {}",lib.ext_get_phash(image) & 0xffffffffffffffff) # Do cleanup #lib.teardown() + +print("ffi test finished") diff --git a/src/cache.rs b/src/cache.rs index 43c4133..ef66567 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -5,9 +5,14 @@ extern crate image; extern crate sha1; +extern crate flate2; use self::image::ImageBuffer; use self::sha1::Sha1; +use self::flate2::Compression; +use self::flate2::write::ZlibEncoder; +use self::flate2::read::ZlibDecoder; +use std::str::FromStr; use std::path::Path; use std::fs::{File, create_dir_all, remove_dir_all}; use std::io::{Read, Error, Write}; @@ -78,6 +83,7 @@ pub fn put_matrix_in_cache(path: &Path, // Save the file into the cache match File::create(&cached_path) { Ok(mut file) => { + let mut compressor = ZlibEncoder::new(Vec::new(), Compression::Default); for row in file_contents { let mut row_str = row.iter().fold(String::new(), |acc, &item| acc + &format!("{},", item)); @@ -85,8 +91,13 @@ pub fn put_matrix_in_cache(path: &Path, let desire_len = row_str.len() - 1; row_str.truncate(desire_len); row_str.push_str("\n"); - file.write(&row_str.into_bytes()); + compressor.write(&row_str.into_bytes()); } + let compressed_matrix = match compressor.finish() { + Ok(data) => data, + Err(e) => { println!("Unable to compress matrix data: {}", e); return }, + }; + file.write(&compressed_matrix); file.flush(); } Err(_) => {} @@ -144,11 +155,19 @@ pub fn get_matrix_from_cache(path: &Path, size: u32, extension: &str) -> Option< // Try to open, if it does, then we can read the image in match File::open(&cached_path) { Ok(mut file) => { - let mut matrix: Vec> = Vec::new(); - let mut matrix_data: Vec = Vec::new(); - file.read_to_end(&mut matrix_data); - let matrix_data_str = String::from_utf8(matrix_data); + let mut compressed_matrix_data: Vec = Vec::new(); + let mut decoder = ZlibDecoder::new(&file); + let mut matrix_data_str = String::new(); + match decoder.read_to_string(&mut matrix_data_str) { + Ok(_) => {}, + Err(e) => { println!("Unable to decompress matrix: {}",e); return None } + }; // convert the matrix + let matrix: Vec> = matrix_data_str.trim().split("\n") + .map(|line| line.split(",") + .map(|f| f64::from_str(f).unwrap()).collect()) + .collect(); + Some(matrix) } // Don't really care here, it just means an existing cached diff --git a/src/hash.rs b/src/hash.rs index a69d58f..0887700 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -294,24 +294,33 @@ impl<'a> PerceptualHash for PHash<'a> { let height = self.prepared_image.image.height() as usize; // Get 2d data to 2d FFT/DFT + // Either from the cache or calculate it + // Pretty fast already, so caching doesn't make a huge difference + // Atleast compared to opening and processing the images let mut data_matrix: Vec> = Vec::new(); - for x in 0..width { - data_matrix.push(Vec::new()); - for y in 0..height { - let pos_x = x as u32; - let pos_y = y as u32; - data_matrix[x] - .push(self.prepared_image.image.get_pixel(pos_x, pos_y).channels()[0] as f64); - } - } + match cache::get_matrix_from_cache(&Path::new(self.prepared_image.orig_path), width as u32, &"dft") { + Some(matrix) => data_matrix = matrix, + None => { + //Preparing the results + for x in 0..width { + data_matrix.push(Vec::new()); + for y in 0..height { + let pos_x = x as u32; + let pos_y = y as u32; + data_matrix[x] + .push(self.prepared_image.image.get_pixel(pos_x, pos_y).channels()[0] as f64); + } + } - // Perform the 2D DFT operation on our matrix - calculate_2d_dft(&mut data_matrix); - // Store this DFT in the cache - cache::put_matrix_in_cache(&Path::new(self.prepared_image.orig_path), + // Perform the 2D DFT operation on our matrix + calculate_2d_dft(&mut data_matrix); + // Store this DFT in the cache + cache::put_matrix_in_cache(&Path::new(self.prepared_image.orig_path), width as u32, &"dft", &data_matrix); + }, + } // Only need the top left quadrant let target_width = (width / 4) as usize;