You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

250 lines
7.0 KiB

8 years ago
  1. // Copyright 2016 Drew Short <drew@sothr.com>.
  2. //
  3. // Licensed under the MIT license<LICENSE-MIT or http://opensource.org/licenses/MIT>.
  4. // This file may not be copied, modified, or distributed except according to those terms.
  5. extern crate dft;
  6. extern crate image;
  7. use self::image::FilterType;
  8. use cache::Cache;
  9. use std::f64;
  10. use std::fmt;
  11. use std::path::Path;
  12. mod ahash;
  13. mod dhash;
  14. mod phash;
  15. // Constants //
  16. // Used to get ranges for the precision of rounding floats
  17. // Can round to 1 significant factor of precision
  18. const FLOAT_PRECISION_MAX_1: f64 = f64::MAX / 10_f64;
  19. const FLOAT_PRECISION_MIN_1: f64 = f64::MIN / 10_f64;
  20. // Can round to 2 significant factors of precision
  21. const FLOAT_PRECISION_MAX_2: f64 = f64::MAX / 100_f64;
  22. const FLOAT_PRECISION_MIN_2: f64 = f64::MIN / 100_f64;
  23. // Can round to 3 significant factors of precision
  24. const FLOAT_PRECISION_MAX_3: f64 = f64::MAX / 1000_f64;
  25. const FLOAT_PRECISION_MIN_3: f64 = f64::MIN / 1000_f64;
  26. // Can round to 4 significant factors of precision
  27. const FLOAT_PRECISION_MAX_4: f64 = f64::MAX / 10000_f64;
  28. const FLOAT_PRECISION_MIN_4: f64 = f64::MIN / 10000_f64;
  29. // Can round to 5 significant factors of precision
  30. const FLOAT_PRECISION_MAX_5: f64 = f64::MAX / 100000_f64;
  31. const FLOAT_PRECISION_MIN_5: f64 = f64::MIN / 100000_f64;
  32. // Hamming Distance Similarity Limit //
  33. const HAMMING_DISTANCE_SIMILARITY_LIMIT: u64 = 5u64;
  34. // Structs/Enums //
  35. /**
  36. * Prepared image that can be used to generate hashes
  37. */
  38. pub struct PreparedImage<'a> {
  39. orig_path: &'a str,
  40. image: Option<image::DynamicImage>,
  41. }
  42. /**
  43. * Wraps the various perceptual hashes
  44. */
  45. pub struct PerceptualHashes<'a> {
  46. pub orig_path: &'a str,
  47. pub ahash: u64,
  48. pub dhash: u64,
  49. pub phash: u64,
  50. }
  51. impl<'a> PerceptualHashes<'a> {
  52. pub fn similar(&self, other: &'a PerceptualHashes<'a>) -> bool {
  53. if self.orig_path != other.orig_path
  54. && calculate_hamming_distance(self.ahash, other.ahash)
  55. <= HAMMING_DISTANCE_SIMILARITY_LIMIT
  56. && calculate_hamming_distance(self.dhash, other.dhash)
  57. <= HAMMING_DISTANCE_SIMILARITY_LIMIT
  58. && calculate_hamming_distance(self.phash, other.phash)
  59. <= HAMMING_DISTANCE_SIMILARITY_LIMIT
  60. {
  61. true
  62. } else {
  63. false
  64. }
  65. }
  66. }
  67. /**
  68. * All the supported precision types
  69. *
  70. * Low aims for 32 bit precision
  71. * Medium aims for 64 bit precision
  72. * High aims for 128 bit precision
  73. */
  74. #[allow(dead_code)]
  75. pub enum Precision {
  76. Low,
  77. Medium,
  78. High,
  79. }
  80. // Get the size of the required image
  81. //
  82. impl Precision {
  83. fn get_size(&self) -> u32 {
  84. match *self {
  85. Precision::Low => 4,
  86. Precision::Medium => 8,
  87. Precision::High => 16,
  88. }
  89. }
  90. }
  91. /**
  92. * Types of hashes supported
  93. */
  94. pub enum HashType {
  95. AHash,
  96. DHash,
  97. PHash,
  98. }
  99. impl fmt::Display for HashType {
  100. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  101. match *self {
  102. HashType::AHash => write!(f, "AHash"),
  103. HashType::DHash => write!(f, "DHash"),
  104. HashType::PHash => write!(f, "PHash"),
  105. }
  106. }
  107. }
  108. // Traits //
  109. pub trait PerceptualHash {
  110. fn get_hash(&self, cache: &Option<Cache>) -> u64;
  111. }
  112. // Functions //
  113. /**
  114. * Resonsible for parsing a path, converting an image and package it to be
  115. * hashed.
  116. *
  117. * # Arguments
  118. *
  119. * * 'path' - The path to the image requested to be hashed
  120. * * 'size' - The size that the image should be resize to, in the form of size x size
  121. *
  122. * # Returns
  123. *
  124. * A PreparedImage struct with the required information for performing hashing
  125. *
  126. */
  127. pub fn prepare_image<'a>(
  128. path: &'a Path,
  129. hash_type: &HashType,
  130. precision: &Precision,
  131. cache: &Option<Cache>,
  132. ) -> PreparedImage<'a> {
  133. let image_path = path.to_str().unwrap();
  134. let size: u32 = match *hash_type {
  135. HashType::PHash => precision.get_size() * 4,
  136. _ => precision.get_size(),
  137. };
  138. // Check if we have the already converted image in a cache and use that if possible.
  139. match *cache {
  140. Some(ref c) => {
  141. match c.get_image_from_cache(&path, size) {
  142. Some(image) => PreparedImage {
  143. orig_path: &*image_path,
  144. image: Some(image),
  145. },
  146. None => {
  147. let processed_image = process_image(&image_path, size);
  148. // Oh, and save it in a cache
  149. match processed_image.image {
  150. Some(ref image) => {
  151. match c.put_image_in_cache(&path, size, &image) {
  152. Ok(_) => {}
  153. Err(e) => println!("Unable to store image in cache. {}", e),
  154. };
  155. }
  156. None => {}
  157. };
  158. processed_image
  159. }
  160. }
  161. }
  162. None => process_image(&image_path, size),
  163. }
  164. }
  165. /**
  166. * Turn the image into something we can work with
  167. */
  168. fn process_image<'a>(image_path: &'a str, size: u32) -> PreparedImage<'a> {
  169. // Otherwise let's do that work now and store it.
  170. // println!("Path: {}", image_path);
  171. let image = match image::open(Path::new(image_path)) {
  172. Ok(image) => {
  173. let small_image = image.resize_exact(size, size, FilterType::Lanczos3);
  174. Some(small_image.grayscale())
  175. }
  176. Err(e) => {
  177. println!("Error Processing Image [{}]: {} ", image_path, e);
  178. None
  179. }
  180. };
  181. PreparedImage {
  182. orig_path: &*image_path,
  183. image,
  184. }
  185. }
  186. /**
  187. * Get a specific HashType hash
  188. */
  189. pub fn get_perceptual_hash<'a>(
  190. path: &'a Path,
  191. precision: &Precision,
  192. hash_type: &HashType,
  193. cache: &Option<Cache>,
  194. ) -> u64 {
  195. match *hash_type {
  196. HashType::AHash => ahash::AHash::new(&path, &precision, &cache).get_hash(&cache),
  197. HashType::DHash => dhash::DHash::new(&path, &precision, &cache).get_hash(&cache),
  198. HashType::PHash => phash::PHash::new(&path, &precision, &cache).get_hash(&cache),
  199. }
  200. }
  201. /**
  202. * Get all perceptual hashes for an image
  203. */
  204. pub fn get_perceptual_hashes<'a>(
  205. path: &'a Path,
  206. precision: &Precision,
  207. cache: &Option<Cache>,
  208. ) -> PerceptualHashes<'a> {
  209. let image_path = path.to_str().unwrap();
  210. let ahash = ahash::AHash::new(&path, &precision, &cache).get_hash(&cache);
  211. let dhash = dhash::DHash::new(&path, &precision, &cache).get_hash(&cache);
  212. let phash = phash::PHash::new(&path, &precision, &cache).get_hash(&cache);
  213. PerceptualHashes {
  214. orig_path: &*image_path,
  215. ahash: ahash,
  216. dhash: dhash,
  217. phash: phash,
  218. }
  219. }
  220. /**
  221. * Calculate the number of bits different between two hashes
  222. * Add to the PerceptualHashTrait
  223. */
  224. pub fn calculate_hamming_distance(hash1: u64, hash2: u64) -> u64 {
  225. // The binary xor of the two hashes should give us a number representing
  226. // the differences between the two hashes. All that's left is to count
  227. // the number of 1's in the difference to determine the hamming distance
  228. (hash1 ^ hash2).count_ones() as u64
  229. }