Browse Source

Added Compression To Cached Matrix Data

Compression using flate2 has been added to the caching controller
Made the hash code call for cached matricies before calculating them again if
    possible
Changed the FFI tests to be a bit more dynamic. That said, I just noticed that
    python hates unsigned 64bit numbers. Need to find a way around this. I
    should be able to compare the native Rust code against the python FFI and
    get the same results for a specific file.
develop
Drew Short 9 years ago
parent
commit
8d9161bef3
  1. 1
      Cargo.toml
  2. 40
      FFI-tests/ffi_test.py
  3. 29
      src/cache.rs
  4. 9
      src/hash.rs

1
Cargo.toml

@ -20,3 +20,4 @@ complex = "0.8.0"
dft = "0.4.1" dft = "0.4.1"
sha1 = "0.1.1" sha1 = "0.1.1"
libc = "0.2.4" libc = "0.2.4"
flate2 = "0.2.11"

40
FFI-tests/ffi_test.py

@ -1,12 +1,23 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from ctypes import *
from _ffi_test_py import ffi, lib from _ffi_test_py import ffi, lib
large_image_path = "test_images/sample_01_large.jpg".encode(encoding="utf-8")
medium_image_path = "test_images/sample_01_medium.jpg".encode(encoding="utf-8")
small_image_path = "test_images/sample_01_small.jpg".encode(encoding="utf-8")
large_image1_path = "test_images/sample_01_large.jpg".encode(encoding="utf-8")
medium_image1_path = "test_images/sample_01_medium.jpg".encode(encoding="utf-8")
small_image1_path = "test_images/sample_01_small.jpg".encode(encoding="utf-8")
print("starting test")
large_image2_path = "test_images/sample_02_large.jpg".encode(encoding="utf-8")
medium_image2_path = "test_images/sample_02_medium.jpg".encode(encoding="utf-8")
small_image2_path = "test_images/sample_02_small.jpg".encode(encoding="utf-8")
large_image3_path = "test_images/sample_03_large.jpg".encode(encoding="utf-8")
medium_image3_path = "test_images/sample_03_medium.jpg".encode(encoding="utf-8")
small_image3_path = "test_images/sample_03_small.jpg".encode(encoding="utf-8")
test_images=[large_image1_path, medium_image1_path, small_image1_path,large_image2_path, medium_image2_path, small_image2_path,large_image3_path, medium_image3_path, small_image3_path]
print("starting ffi test")
#initialize the library #initialize the library
lib.init() lib.init()
@ -15,20 +26,13 @@ lib.init()
#print(large_image_path) #print(large_image_path)
#print('\\x'+'\\x'.join('{:02x}'.format(x) for x in large_image_path)) #print('\\x'+'\\x'.join('{:02x}'.format(x) for x in large_image_path))
print("Get hashes for {}", large_image_path)
print("AHash: {}",lib.ext_get_ahash(large_image_path))
print("DHash: {}",lib.ext_get_dhash(large_image_path))
print("PHash: {}",lib.ext_get_phash(large_image_path))
print("Get hashes for {}", medium_image_path)
print("AHash: {}",lib.ext_get_ahash(medium_image_path))
print("DHash: {}",lib.ext_get_dhash(medium_image_path))
print("PHash: {}",lib.ext_get_phash(medium_image_path))
print("Get hashes for {}", small_image_path)
print("AHash: {}",lib.ext_get_ahash(small_image_path))
print("DHash: {}",lib.ext_get_dhash(small_image_path))
print("PHash: {}",lib.ext_get_phash(small_image_path))
for image in test_images:
print("Get hashes for {}", image)
print("AHash: {}",lib.ext_get_ahash(image) & 0xffffffffffffffff)
print("DHash: {}",lib.ext_get_dhash(image) & 0xffffffffffffffff)
print("PHash: {}",lib.ext_get_phash(image) & 0xffffffffffffffff)
# Do cleanup # Do cleanup
#lib.teardown() #lib.teardown()
print("ffi test finished")

29
src/cache.rs

@ -5,9 +5,14 @@
extern crate image; extern crate image;
extern crate sha1; extern crate sha1;
extern crate flate2;
use self::image::ImageBuffer; use self::image::ImageBuffer;
use self::sha1::Sha1; use self::sha1::Sha1;
use self::flate2::Compression;
use self::flate2::write::ZlibEncoder;
use self::flate2::read::ZlibDecoder;
use std::str::FromStr;
use std::path::Path; use std::path::Path;
use std::fs::{File, create_dir_all, remove_dir_all}; use std::fs::{File, create_dir_all, remove_dir_all};
use std::io::{Read, Error, Write}; use std::io::{Read, Error, Write};
@ -78,6 +83,7 @@ pub fn put_matrix_in_cache(path: &Path,
// Save the file into the cache // Save the file into the cache
match File::create(&cached_path) { match File::create(&cached_path) {
Ok(mut file) => { Ok(mut file) => {
let mut compressor = ZlibEncoder::new(Vec::new(), Compression::Default);
for row in file_contents { for row in file_contents {
let mut row_str = row.iter().fold(String::new(), let mut row_str = row.iter().fold(String::new(),
|acc, &item| acc + &format!("{},", item)); |acc, &item| acc + &format!("{},", item));
@ -85,8 +91,13 @@ pub fn put_matrix_in_cache(path: &Path,
let desire_len = row_str.len() - 1; let desire_len = row_str.len() - 1;
row_str.truncate(desire_len); row_str.truncate(desire_len);
row_str.push_str("\n"); row_str.push_str("\n");
file.write(&row_str.into_bytes());
compressor.write(&row_str.into_bytes());
} }
let compressed_matrix = match compressor.finish() {
Ok(data) => data,
Err(e) => { println!("Unable to compress matrix data: {}", e); return },
};
file.write(&compressed_matrix);
file.flush(); file.flush();
} }
Err(_) => {} Err(_) => {}
@ -144,11 +155,19 @@ pub fn get_matrix_from_cache(path: &Path, size: u32, extension: &str) -> Option<
// Try to open, if it does, then we can read the image in // Try to open, if it does, then we can read the image in
match File::open(&cached_path) { match File::open(&cached_path) {
Ok(mut file) => { Ok(mut file) => {
let mut matrix: Vec<Vec<f64>> = Vec::new();
let mut matrix_data: Vec<u8> = Vec::new();
file.read_to_end(&mut matrix_data);
let matrix_data_str = String::from_utf8(matrix_data);
let mut compressed_matrix_data: Vec<u8> = Vec::new();
let mut decoder = ZlibDecoder::new(&file);
let mut matrix_data_str = String::new();
match decoder.read_to_string(&mut matrix_data_str) {
Ok(_) => {},
Err(e) => { println!("Unable to decompress matrix: {}",e); return None }
};
// convert the matrix // convert the matrix
let matrix: Vec<Vec<f64>> = matrix_data_str.trim().split("\n")
.map(|line| line.split(",")
.map(|f| f64::from_str(f).unwrap()).collect())
.collect();
Some(matrix) Some(matrix)
} }
// Don't really care here, it just means an existing cached // Don't really care here, it just means an existing cached

9
src/hash.rs

@ -294,7 +294,14 @@ impl<'a> PerceptualHash for PHash<'a> {
let height = self.prepared_image.image.height() as usize; let height = self.prepared_image.image.height() as usize;
// Get 2d data to 2d FFT/DFT // Get 2d data to 2d FFT/DFT
// Either from the cache or calculate it
// Pretty fast already, so caching doesn't make a huge difference
// Atleast compared to opening and processing the images
let mut data_matrix: Vec<Vec<f64>> = Vec::new(); let mut data_matrix: Vec<Vec<f64>> = Vec::new();
match cache::get_matrix_from_cache(&Path::new(self.prepared_image.orig_path), width as u32, &"dft") {
Some(matrix) => data_matrix = matrix,
None => {
//Preparing the results
for x in 0..width { for x in 0..width {
data_matrix.push(Vec::new()); data_matrix.push(Vec::new());
for y in 0..height { for y in 0..height {
@ -312,6 +319,8 @@ impl<'a> PerceptualHash for PHash<'a> {
width as u32, width as u32,
&"dft", &"dft",
&data_matrix); &data_matrix);
},
}
// Only need the top left quadrant // Only need the top left quadrant
let target_width = (width / 4) as usize; let target_width = (width / 4) as usize;

Loading…
Cancel
Save