Browse Source

Moved the cache into a struct

develop
Drew Short 9 years ago
parent
commit
b543cb397e
  1. 2
      Cargo.toml
  2. 163
      src/cache.rs
  3. 42
      src/hash.rs
  4. 87
      src/lib.rs

2
Cargo.toml

@ -1,6 +1,6 @@
[package]
name = "pihash"
version = "0.2.3"
version = "0.2.4"
authors = ["Drew Short <warrick@sothr.com>"]
description = "A simple library for generating perceptual hashes for images and comparing images based on their perceptual hashes."
repository = "https://github.com/warricksothr/Perceptual-Image-Hashing/"

163
src/cache.rs

@ -19,22 +19,42 @@ use std::io::{Read, Error, Write};
use std::option::Option;
use std::result::Result;
const CACHE_DIR: &'static str = "./.hash_cache";
const CACHE_FILE_EXT: &'static str = "png";
pub const CACHE_DIR: &'static str = "./.hash_cache";
const CACHED_IMAGE_EXT: &'static str = "png";
const CACHED_MATRIX_EXT: &'static str = "dft";
// Caching version information
const CACHE_VERSION: u32 = 1;
// Creates the required directories
pub fn prep_cache() -> Result<(), Error> {
create_dir_all(CACHE_DIR)
/**
* Structure to hold implementation of the cache
*/
pub struct Cache<'a> {
pub cache_dir: &'a str,
}
pub fn clear_cache() -> Result<(), Error> {
remove_dir_all(CACHE_DIR)
impl<'a> Default for Cache<'a> {
fn default() -> Cache<'a> { Cache {cache_dir: CACHE_DIR } }
}
/**
impl<'a> Cache<'a> {
/**
* Create the required directories for the cache
*/
pub fn init(&self) -> Result<(), Error> {
create_dir_all(self.cache_dir)
}
/**
* Clean the cache directory completely
*/
pub fn clean(&self) -> Result<(), Error> {
remove_dir_all(self.cache_dir)
}
/**
* Get the hash of the desired file and return it as a hex string
*/
fn get_file_hash(path: &Path) -> Result<String, Error> {
pub fn get_file_hash(&self, path: &Path) -> Result<String, Error> {
let mut source = try!(File::open(&path));
let mut buf: Vec<u8> = Vec::new();
try!(source.read_to_end(&mut buf));
@ -42,24 +62,28 @@ fn get_file_hash(path: &Path) -> Result<String, Error> {
sha1.update(&buf);
// Return the hex result of the hash
Ok(sha1.hexdigest())
}
}
/**
/**
* Put an image buffer in the cache
*/
pub fn put_image_in_cache(path: &Path,
pub fn put_image_in_cache(&self,
path: &Path,
size: u32,
image: &ImageBuffer<image::Luma<u8>, Vec<u8>>)
-> Result<bool, Error> {
let hash = get_file_hash(&path);
let hash = self.get_file_hash(&path);
match hash {
Ok(sha1) => {
let cache_path_str = format!("{}/{}x{}_{}.{}",
let cache_path_str = format!("{}/image/{}x{}/{}.{}",
CACHE_DIR,
size,
size,
sha1,
CACHE_FILE_EXT);
CACHED_IMAGE_EXT);
let cache_dir_str = format!("{}/image/{}x{}", self.cache_dir, size, size);
match create_dir_all(cache_dir_str) {
Ok(_) => {
let cached_path = Path::new(&cache_path_str);
// Save the file into the cache
match image.save(cached_path) {
@ -69,6 +93,9 @@ pub fn put_image_in_cache(path: &Path,
return Err(e);
}
}
},
Err(e) => println!("Error: {}", e),
}
}
Err(e) => {
println!("Error: {}", e);
@ -76,20 +103,59 @@ pub fn put_image_in_cache(path: &Path,
}
}
Ok(true)
}
}
/**
/**
* Get an image buffer out of the cache
*/
pub fn get_image_from_cache(&self,
path: &Path,
size: u32)
-> Option<ImageBuffer<image::Luma<u8>, Vec<u8>>> {
let hash = self.get_file_hash(&path);
match hash {
Ok(sha1) => {
// Check if the file exists in the cache
let cache_path_str = format!("{}/image/{}x{}/{}.{}",
self.cache_dir,
size,
size,
sha1,
CACHED_IMAGE_EXT);
let cached_path = Path::new(&cache_path_str);
// Try to open, if it does, then we can read the image in
match File::open(&cached_path) {
Ok(_) => {
let image = image::open(&cached_path).unwrap();
Some(image.to_luma())
}
// Don't really care here, it just means an existing cached
// file doesn't exist, or can't be read.
Err(_) => None,
}
}
Err(e) => {
println!("Error: {}", e);
None
}
}
}
/**
* Expects a slice of slices that represents lines in the file
*/
pub fn put_matrix_in_cache(path: &Path,
pub fn put_matrix_in_cache(&self,
path: &Path,
size: u32,
extension: &str,
file_contents: &Vec<Vec<f64>>)
-> Result<bool, Error> {
let hash = get_file_hash(&path);
let hash = self.get_file_hash(&path);
match hash {
Ok(sha1) => {
let cache_path_str = format!("{}/{}x{}_{}.{}", CACHE_DIR, size, size, sha1, extension);
let cache_path_str = format!("{}/matrix/{}x{}/{}.{}", self.cache_dir, size, size, sha1, CACHED_MATRIX_EXT);
let cache_dir_str = format!("{}/matrix/{}x{}", self.cache_dir, size, size);
match create_dir_all(cache_dir_str) {
Ok(_) => {
let cached_path = Path::new(&cache_path_str);
// Save the file into the cache
match File::create(&cached_path) {
@ -118,6 +184,9 @@ pub fn put_matrix_in_cache(path: &Path,
return Err(e);
}
}
},
Err(e) => println!("Error: {}", e),
}
}
Err(e) => {
println!("Error: {}", e);
@ -125,52 +194,20 @@ pub fn put_matrix_in_cache(path: &Path,
}
}
Ok(true)
}
/**
* Get an image buffer out of the cache
*/
pub fn get_image_from_cache(path: &Path,
size: u32)
-> Option<ImageBuffer<image::Luma<u8>, Vec<u8>>> {
let hash = get_file_hash(&path);
match hash {
Ok(sha1) => {
// Check if the file exists in the cache
let cache_path_str = format!("{}/{}x{}_{}.{}",
CACHE_DIR,
size,
size,
sha1,
CACHE_FILE_EXT);
let cached_path = Path::new(&cache_path_str);
// Try to open, if it does, then we can read the image in
match File::open(&cached_path) {
Ok(_) => {
let image = image::open(&cached_path).unwrap();
Some(image.to_luma())
}
// Don't really care here, it just means an existing cached
// file doesn't exist, or can't be read.
Err(_) => None,
}
}
Err(e) => {
println!("Error: {}", e);
None
}
}
}
/**
/**
* Get a matrix out of the cache
*/
pub fn get_matrix_from_cache(path: &Path, size: u32, extension: &str) -> Option<Vec<Vec<f64>>> {
let hash = get_file_hash(&path);
pub fn get_matrix_from_cache(&self,
path: &Path,
size: u32)
-> Option<Vec<Vec<f64>>> {
let hash = self.get_file_hash(&path);
match hash {
Ok(sha1) => {
// Check if the file exists in the cache
let cache_path_str = format!("{}/{}x{}_{}.{}", CACHE_DIR, size, size, sha1, extension);
let cache_path_str = format!("{}/matrix/{}x{}/{}.{}", CACHE_DIR, size, size, sha1, CACHED_MATRIX_EXT);
let cached_path = Path::new(&cache_path_str);
// Try to open, if it does, then we can read the image in
match File::open(&cached_path) {
@ -208,15 +245,15 @@ pub fn get_matrix_from_cache(path: &Path, size: u32, extension: &str) -> Option<
None
}
}
}
}
#[test]
fn test_get_file_hash() {
let target = "test_images/sample_01_large.jpg";
let target_path = Path::new(target);
let hash = get_file_hash(&target_path);
let cache: Cache = Default::default();
let hash = cache.get_file_hash(&target_path);
match hash {
Ok(v) => {
println!("Hash: {}", v);

42
src/hash.rs

@ -12,7 +12,7 @@ use std::path::Path;
use std::f64;
use self::image::{GenericImage, Pixel, FilterType};
use self::dft::Transform;
use cache;
use cache::Cache;
// Used to get ranges for the precision of rounding floats
// Can round to 1 significant factor of precision
@ -37,6 +37,7 @@ const FLOAT_PRECISION_MIN_5: f64 = f64::MIN / 100000_f64;
pub struct PreparedImage<'a> {
orig_path: &'a str,
image: image::ImageBuffer<image::Luma<u8>, Vec<u8>>,
cache: &'a Cache<'a>,
}
/**
@ -56,6 +57,7 @@ pub struct PerceptualHashes<'a> {
* Medium aims for 64 bit precision
* High aims for 128 bit precision
*/
#[allow(dead_code)]
pub enum Precision {
Low,
Medium,
@ -99,7 +101,8 @@ pub enum HashType {
*/
pub fn prepare_image<'a>(path: &'a Path,
hash_type: &HashType,
precision: &Precision)
precision: &Precision,
cache: &'a Cache<'a>)
-> PreparedImage<'a> {
let image_path = path.to_str().unwrap();
let size: u32 = match *hash_type {
@ -107,11 +110,12 @@ pub fn prepare_image<'a>(path: &'a Path,
_ => precision.get_size(),
};
// Check if we have the already converted image in a cache and use that if possible.
match cache::get_image_from_cache(&path, size) {
match cache.get_image_from_cache(&path, size) {
Some(image) => {
PreparedImage {
orig_path: &*image_path,
image: image,
cache: &cache
}
}
None => {
@ -119,13 +123,14 @@ pub fn prepare_image<'a>(path: &'a Path,
let image = image::open(path).unwrap();
let small_image = image.resize_exact(size, size, FilterType::Lanczos3);
let grey_image = small_image.to_luma();
match cache::put_image_in_cache(&path, size, &grey_image) {
match cache.put_image_in_cache(&path, size, &grey_image) {
Ok(_) => {}
Err(e) => println!("Unable to store image in cache. {}", e),
};
PreparedImage {
orig_path: &*image_path,
image: grey_image,
cache: &cache,
}
}
}
@ -134,11 +139,11 @@ pub fn prepare_image<'a>(path: &'a Path,
/**
* Get all perceptual hashes for an image
*/
pub fn get_perceptual_hashes<'a>(path: &'a Path, precision: &Precision) -> PerceptualHashes<'a> {
pub fn get_perceptual_hashes<'a>(path: &'a Path, precision: &Precision, cache: &Cache) -> PerceptualHashes<'a> {
let image_path = path.to_str().unwrap();
let ahash = AHash::new(&path, &precision).get_hash();
let dhash = DHash::new(&path, &precision).get_hash();
let phash = PHash::new(&path, &precision).get_hash();
let ahash = AHash::new(&path, &precision, &cache).get_hash();
let dhash = DHash::new(&path, &precision, &cache).get_hash();
let phash = PHash::new(&path, &precision, &cache).get_hash();
PerceptualHashes {
orig_path: &*image_path,
ahash: ahash,
@ -149,6 +154,7 @@ pub fn get_perceptual_hashes<'a>(path: &'a Path, precision: &Precision) -> Perce
/**
* Calculate the number of bits different between two hashes
* Add to the PerceptualHashTrait
*/
pub fn calculate_hamming_distance(hash1: u64, hash2: u64) -> u64 {
// The binary xor of the two hashes should give us a number representing
@ -175,8 +181,8 @@ pub struct AHash<'a> {
}
impl<'a> AHash<'a> {
pub fn new(path: &'a Path, precision: &Precision) -> Self {
AHash { prepared_image: Box::new(prepare_image(&path, &HashType::Ahash, &precision)) }
pub fn new(path: &'a Path, precision: &Precision, cache: &'a Cache) -> Self {
AHash { prepared_image: Box::new(prepare_image(&path, &HashType::Ahash, &precision, &cache)) }
}
}
@ -226,8 +232,8 @@ pub struct DHash<'a> {
}
impl<'a> DHash<'a> {
pub fn new(path: &'a Path, precision: &Precision) -> Self {
DHash { prepared_image: Box::new(prepare_image(&path, &HashType::Dhash, &precision)) }
pub fn new(path: &'a Path, precision: &Precision, cache: &'a Cache) -> Self {
DHash { prepared_image: Box::new(prepare_image(&path, &HashType::Dhash, &precision, &cache)) }
}
}
@ -278,8 +284,8 @@ pub struct PHash<'a> {
}
impl<'a> PHash<'a> {
pub fn new(path: &'a Path, precision: &Precision) -> Self {
PHash { prepared_image: Box::new(prepare_image(&path, &HashType::Phash, &precision)) }
pub fn new(path: &'a Path, precision: &Precision, cache: &'a Cache) -> Self {
PHash { prepared_image: Box::new(prepare_image(&path, &HashType::Phash, &precision, &cache)) }
}
}
@ -301,9 +307,8 @@ impl<'a> PerceptualHash for PHash<'a> {
// Pretty fast already, so caching doesn't make a huge difference
// Atleast compared to opening and processing the images
let mut data_matrix: Vec<Vec<f64>> = Vec::new();
match cache::get_matrix_from_cache(&Path::new(self.prepared_image.orig_path),
width as u32,
&"dft") {
match self.prepared_image.cache.get_matrix_from_cache(&Path::new(self.prepared_image.orig_path),
width as u32) {
Some(matrix) => data_matrix = matrix,
None => {
// Preparing the results
@ -323,9 +328,8 @@ impl<'a> PerceptualHash for PHash<'a> {
// Perform the 2D DFT operation on our matrix
calculate_2d_dft(&mut data_matrix);
// Store this DFT in the cache
match cache::put_matrix_in_cache(&Path::new(self.prepared_image.orig_path),
match self.prepared_image.cache.put_matrix_in_cache(&Path::new(self.prepared_image.orig_path),
width as u32,
&"dft",
&data_matrix) {
Ok(_) => {}
Err(e) => println!("Unable to store matrix in cache. {}", e),

87
src/lib.rs

@ -11,6 +11,9 @@ extern crate libc;
use std::path::Path;
use hash::PerceptualHash;
use std::ffi::CStr;
use cache::Cache;
static LIB_CACHE: Cache<'static> = Cache { cache_dir: cache::CACHE_DIR };
/**
* Prepare the library for work.
@ -19,7 +22,7 @@ use std::ffi::CStr;
*/
#[no_mangle]
pub extern "C" fn init() {
match cache::prep_cache() {
match LIB_CACHE.init() {
Ok(_) => {}
Err(e) => println!("Error: {}", e),
}
@ -30,27 +33,27 @@ pub extern "C" fn init() {
*/
#[no_mangle]
pub extern "C" fn teardown() {
match cache::clear_cache() {
match LIB_CACHE.clean() {
Ok(_) => {}
Err(e) => println!("Error: {}", e),
}
}
pub fn get_phashes(path: &Path) -> hash::PerceptualHashes {
hash::get_perceptual_hashes(path, &hash::Precision::Medium)
hash::get_perceptual_hashes(path, &hash::Precision::Medium, &LIB_CACHE)
}
pub fn get_ahash(path: &Path) -> u64 {
hash::AHash::new(&path, &hash::Precision::Medium).get_hash()
hash::AHash::new(&path, &hash::Precision::Medium, &LIB_CACHE).get_hash()
}
pub fn get_dhash(path: &Path) -> u64 {
hash::DHash::new(&path, &hash::Precision::Medium).get_hash()
hash::DHash::new(&path, &hash::Precision::Medium, &LIB_CACHE).get_hash()
}
pub fn get_phash(path: &Path) -> u64 {
hash::PHash::new(&path, &hash::Precision::Medium).get_hash()
hash::PHash::new(&path, &hash::Precision::Medium, &LIB_CACHE).get_hash()
}
pub fn get_hamming_distance(hash1: u64, hash2: u64) -> u64 {
@ -220,12 +223,12 @@ mod tests {
// Sample_01 tests
test_imageset_hash(&hash::AHash::new(path::Path::new("./test_images/sample_01_large.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::AHash::new(path::Path::new("./test_images/sample_01_medium.\
jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::AHash::new(path::Path::new("./test_images/sample_01_small.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
857051991849750,
857051991849750,
857051991849750,
@ -235,12 +238,12 @@ mod tests {
// Sample_02 tests
test_imageset_hash(&hash::AHash::new(path::Path::new("./test_images/sample_02_large.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::AHash::new(path::Path::new("./test_images/sample_02_medium.\
jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::AHash::new(path::Path::new("./test_images/sample_02_small.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
18446744073441116160,
18446744073441116160,
18446744073441116160,
@ -249,12 +252,12 @@ mod tests {
0u64);
// Sample_03 tests
test_imageset_hash(&hash::AHash::new(path::Path::new("./test_images/sample_03_large.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::AHash::new(path::Path::new("./test_images/sample_03_medium.\
jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::AHash::new(path::Path::new("./test_images/sample_03_small.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
135670932300497406,
135670932300497406,
135670932300497406,
@ -264,12 +267,12 @@ mod tests {
// Sample_04 tests
test_imageset_hash(&hash::AHash::new(path::Path::new("./test_images/sample_04_large.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::AHash::new(path::Path::new("./test_images/sample_04_medium.\
jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::AHash::new(path::Path::new("./test_images/sample_04_small.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
18446460933225054208,
18446460933090836480,
18446460933090836480,
@ -288,12 +291,12 @@ mod tests {
// Sample_01 tests
test_imageset_hash(&hash::DHash::new(path::Path::new("./test_images/sample_01_large.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::DHash::new(path::Path::new("./test_images/sample_01_medium.\
jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::DHash::new(path::Path::new("./test_images/sample_01_small.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
7937395827556495926,
7937395827556495926,
7939647627370181174,
@ -303,12 +306,12 @@ mod tests {
// Sample_02 tests
test_imageset_hash(&hash::DHash::new(path::Path::new("./test_images/sample_02_large.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::DHash::new(path::Path::new("./test_images/sample_02_medium.\
jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::DHash::new(path::Path::new("./test_images/sample_02_small.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
11009829669713008949,
11009829670249879861,
11009829669713008949,
@ -317,12 +320,12 @@ mod tests {
1u64);
// Sample_03 tests
test_imageset_hash(&hash::DHash::new(path::Path::new("./test_images/sample_03_large.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::DHash::new(path::Path::new("./test_images/sample_03_medium.\
jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::DHash::new(path::Path::new("./test_images/sample_03_small.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
225528496439353286,
225528496439353286,
226654396346195908,
@ -332,12 +335,12 @@ mod tests {
// Sample_04 tests
test_imageset_hash(&hash::DHash::new(path::Path::new("./test_images/sample_04_large.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::DHash::new(path::Path::new("./test_images/sample_04_medium.\
jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::DHash::new(path::Path::new("./test_images/sample_04_small.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
14620651386429567209,
14620651386429567209,
14620651386429567209,
@ -356,12 +359,12 @@ mod tests {
// Sample_01 tests
test_imageset_hash(&hash::PHash::new(path::Path::new("./test_images/sample_01_large.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::PHash::new(path::Path::new("./test_images/sample_01_medium.\
jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::PHash::new(path::Path::new("./test_images/sample_01_small.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
72357778504597504,
72357778504597504,
72357778504597504,
@ -371,12 +374,12 @@ mod tests {
// Sample_02 tests
test_imageset_hash(&hash::PHash::new(path::Path::new("./test_images/sample_02_large.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::PHash::new(path::Path::new("./test_images/sample_02_medium.\
jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::PHash::new(path::Path::new("./test_images/sample_02_small.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
5332332327550844928,
5332332327550844928,
5332332327550844928,
@ -385,12 +388,12 @@ mod tests {
0u64);
// Sample_03 tests
test_imageset_hash(&hash::PHash::new(path::Path::new("./test_images/sample_03_large.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::PHash::new(path::Path::new("./test_images/sample_03_medium.\
jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::PHash::new(path::Path::new("./test_images/sample_03_small.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
6917529027641081856,
6917529027641081856,
6917529027641081856,
@ -400,12 +403,12 @@ mod tests {
// Sample_04 tests
test_imageset_hash(&hash::PHash::new(path::Path::new("./test_images/sample_04_large.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::PHash::new(path::Path::new("./test_images/sample_04_medium.\
jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
&hash::PHash::new(path::Path::new("./test_images/sample_04_small.jpg"),
&hash::Precision::Medium),
&hash::Precision::Medium, &super::LIB_CACHE),
10997931646002397184,
10997931646002397184,
11142046834078253056,

Loading…
Cancel
Save