Browse Source

Address remaining review comments from PR #8539

Security:
- Remove unused is_write_active field from Guard

gRPC:
- Update needle index after write_needle_blob via new put_needle_index()
- Return unimplemented for set_state (no state persistence yet)
- Add TODO for DiskStatus total/used metrics

Storage:
- Track overwritten needle bytes in deletion_byte_count for garbage_level
- Fix sync order: flush dat file before idx for crash safety
- Reject duplicate volume IDs in mount_volume

Erasure coding:
- Add OOM guard (1 GiB limit) for encode_one_batch allocation
- Add non-Unix read fallback in encode_one_batch
- Validate shard ID bounds in add/remove/has_shard_id
rust-volume-server
Chris Lu 7 days ago
parent
commit
089d94e5ff
  1. 2
      seaweed-volume/src/security.rs
  2. 12
      seaweed-volume/src/server/grpc_server.rs
  3. 23
      seaweed-volume/src/storage/erasure_coding/ec_encoder.rs
  4. 15
      seaweed-volume/src/storage/erasure_coding/ec_shard.rs
  5. 2
      seaweed-volume/src/storage/needle_map.rs
  6. 3
      seaweed-volume/src/storage/store.rs
  7. 23
      seaweed-volume/src/storage/volume.rs

2
seaweed-volume/src/security.rs

@ -113,7 +113,6 @@ pub struct Guard {
pub expires_after_sec: i64, pub expires_after_sec: i64,
pub read_signing_key: SigningKey, pub read_signing_key: SigningKey,
pub read_expires_after_sec: i64, pub read_expires_after_sec: i64,
is_write_active: bool,
} }
impl Guard { impl Guard {
@ -127,7 +126,6 @@ impl Guard {
let mut guard = Guard { let mut guard = Guard {
whitelist_ips: HashSet::new(), whitelist_ips: HashSet::new(),
whitelist_cidrs: Vec::new(), whitelist_cidrs: Vec::new(),
is_write_active: !signing_key.is_empty(),
signing_key, signing_key,
expires_after_sec, expires_after_sec,
read_signing_key, read_signing_key,

12
seaweed-volume/src/server/grpc_server.rs

@ -298,6 +298,7 @@ impl VolumeServer for VolumeGrpcService {
&self, &self,
request: Request<volume_server_pb::SetStateRequest>, request: Request<volume_server_pb::SetStateRequest>,
) -> Result<Response<volume_server_pb::SetStateResponse>, Status> { ) -> Result<Response<volume_server_pb::SetStateResponse>, Status> {
// TODO: Persist state changes. Currently echoes back the request state.
let req = request.into_inner(); let req = request.into_inner();
Ok(Response::new(volume_server_pb::SetStateResponse { Ok(Response::new(volume_server_pb::SetStateResponse {
state: req.state, state: req.state,
@ -414,6 +415,12 @@ impl VolumeServer for VolumeGrpcService {
vol.write_needle_blob(dat_size, &req.needle_blob) vol.write_needle_blob(dat_size, &req.needle_blob)
.map_err(|e| Status::internal(e.to_string()))?; .map_err(|e| Status::internal(e.to_string()))?;
// Update the needle index so the written blob is discoverable
let needle_id = NeedleId(req.needle_id);
let size = Size(req.size);
vol.put_needle_index(needle_id, Offset::from_actual_offset(dat_size), size)
.map_err(|e| Status::internal(e.to_string()))?;
Ok(Response::new(volume_server_pb::WriteNeedleBlobResponse {})) Ok(Response::new(volume_server_pb::WriteNeedleBlobResponse {}))
} }
@ -556,11 +563,14 @@ impl VolumeServer for VolumeGrpcService {
let mut disk_statuses = Vec::new(); let mut disk_statuses = Vec::new();
for loc in &store.locations { for loc in &store.locations {
let free = loc.available_space.load(std::sync::atomic::Ordering::Relaxed);
// TODO: DiskLocation does not yet track total disk size.
// Once implemented, compute all/used/percent from real values.
disk_statuses.push(volume_server_pb::DiskStatus { disk_statuses.push(volume_server_pb::DiskStatus {
dir: loc.directory.clone(), dir: loc.directory.clone(),
all: 0, all: 0,
used: 0, used: 0,
free: loc.available_space.load(std::sync::atomic::Ordering::Relaxed),
free,
percent_free: 0.0, percent_free: 0.0,
percent_used: 0.0, percent_used: 0.0,
disk_type: loc.disk_type.to_string(), disk_type: loc.disk_type.to_string(),

23
seaweed-volume/src/storage/erasure_coding/ec_encoder.rs

@ -132,6 +132,22 @@ fn encode_one_batch(
rs: &ReedSolomon, rs: &ReedSolomon,
shards: &mut [EcVolumeShard], shards: &mut [EcVolumeShard],
) -> io::Result<()> { ) -> io::Result<()> {
// Each batch allocates block_size * TOTAL_SHARDS_COUNT bytes.
// With large blocks (1 GiB) this is 14 GiB -- guard against OOM.
let total_alloc = block_size.checked_mul(TOTAL_SHARDS_COUNT).ok_or_else(|| {
io::Error::new(io::ErrorKind::InvalidInput, "block_size * shard count overflows usize")
})?;
const MAX_BATCH_ALLOC: usize = 1024 * 1024 * 1024; // 1 GiB safety limit
if total_alloc > MAX_BATCH_ALLOC {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!(
"batch allocation too large ({} bytes, limit {} bytes); block_size={} shards={}",
total_alloc, MAX_BATCH_ALLOC, block_size, TOTAL_SHARDS_COUNT,
),
));
}
// Allocate buffers for all shards // Allocate buffers for all shards
let mut buffers: Vec<Vec<u8>> = (0..TOTAL_SHARDS_COUNT) let mut buffers: Vec<Vec<u8>> = (0..TOTAL_SHARDS_COUNT)
.map(|_| vec![0u8; block_size]) .map(|_| vec![0u8; block_size])
@ -146,6 +162,13 @@ fn encode_one_batch(
use std::os::unix::fs::FileExt; use std::os::unix::fs::FileExt;
dat_file.read_at(&mut buffers[i], read_offset)?; dat_file.read_at(&mut buffers[i], read_offset)?;
} }
#[cfg(not(unix))]
{
let mut f = dat_file.try_clone()?;
f.seek(SeekFrom::Start(read_offset))?;
f.read(&mut buffers[i])?;
}
} }
// Encode parity shards // Encode parity shards

15
seaweed-volume/src/storage/erasure_coding/ec_shard.rs

@ -126,14 +126,29 @@ pub struct ShardBits(pub u32);
impl ShardBits { impl ShardBits {
pub fn add_shard_id(&mut self, id: ShardId) { pub fn add_shard_id(&mut self, id: ShardId) {
assert!(
(id as usize) < TOTAL_SHARDS_COUNT,
"shard id {} out of bounds (max {})",
id,
TOTAL_SHARDS_COUNT - 1,
);
self.0 |= 1 << id; self.0 |= 1 << id;
} }
pub fn remove_shard_id(&mut self, id: ShardId) { pub fn remove_shard_id(&mut self, id: ShardId) {
assert!(
(id as usize) < TOTAL_SHARDS_COUNT,
"shard id {} out of bounds (max {})",
id,
TOTAL_SHARDS_COUNT - 1,
);
self.0 &= !(1 << id); self.0 &= !(1 << id);
} }
pub fn has_shard_id(&self, id: ShardId) -> bool { pub fn has_shard_id(&self, id: ShardId) -> bool {
if (id as usize) >= TOTAL_SHARDS_COUNT {
return false;
}
self.0 & (1 << id) != 0 self.0 & (1 << id) != 0
} }

2
seaweed-volume/src/storage/needle_map.rs

@ -45,6 +45,8 @@ impl NeedleMapMetric {
if let Some(old_val) = old { if let Some(old_val) = old {
if old_val.size.is_valid() { if old_val.size.is_valid() {
self.file_byte_count.fetch_sub(old_val.size.0 as u64, Ordering::Relaxed); self.file_byte_count.fetch_sub(old_val.size.0 as u64, Ordering::Relaxed);
// Track overwritten bytes as garbage for compaction (garbage_level)
self.deletion_byte_count.fetch_add(old_val.size.0 as u64, Ordering::Relaxed);
} }
} }
} }

3
seaweed-volume/src/storage/store.rs

@ -159,6 +159,9 @@ impl Store {
collection: &str, collection: &str,
disk_type: DiskType, disk_type: DiskType,
) -> Result<(), VolumeError> { ) -> Result<(), VolumeError> {
if self.find_volume(vid).is_some() {
return Err(VolumeError::AlreadyExists);
}
// Find the location where the .dat file exists // Find the location where the .dat file exists
for loc in &mut self.locations { for loc in &mut self.locations {
if &loc.disk_type != &disk_type { if &loc.disk_type != &disk_type {

23
seaweed-volume/src/storage/volume.rs

@ -621,6 +621,15 @@ impl Volume {
self.no_write_or_delete || self.no_write_can_delete self.no_write_or_delete || self.no_write_can_delete
} }
/// Insert or update a needle index entry (for low-level blob writes).
pub fn put_needle_index(&mut self, key: NeedleId, offset: Offset, size: Size) -> Result<(), VolumeError> {
if let Some(ref mut nm) = self.nm {
nm.put(key, offset, size)
.map_err(VolumeError::Io)?;
}
Ok(())
}
/// Mark this volume as read-only (no writes or deletes). /// Mark this volume as read-only (no writes or deletes).
pub fn set_read_only(&mut self) { pub fn set_read_only(&mut self) {
self.no_write_or_delete = true; self.no_write_or_delete = true;
@ -684,24 +693,24 @@ impl Volume {
// ---- Sync / Close ---- // ---- Sync / Close ----
pub fn sync_to_disk(&mut self) -> io::Result<()> { pub fn sync_to_disk(&mut self) -> io::Result<()> {
if let Some(ref nm) = self.nm {
nm.sync()?;
}
if let Some(ref dat_file) = self.dat_file { if let Some(ref dat_file) = self.dat_file {
dat_file.sync_all()?; dat_file.sync_all()?;
} }
if let Some(ref nm) = self.nm {
nm.sync()?;
}
Ok(()) Ok(())
} }
pub fn close(&mut self) { pub fn close(&mut self) {
if let Some(ref nm) = self.nm {
let _ = nm.sync();
}
self.nm = None;
if let Some(ref dat_file) = self.dat_file { if let Some(ref dat_file) = self.dat_file {
let _ = dat_file.sync_all(); let _ = dat_file.sync_all();
} }
self.dat_file = None; self.dat_file = None;
if let Some(ref nm) = self.nm {
let _ = nm.sync();
}
self.nm = None;
} }
/// Remove all volume files from disk. /// Remove all volume files from disk.

Loading…
Cancel
Save