Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(services/s3): Add more detect logic for detect_region #2645

Merged
merged 1 commit into from
Jul 14, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 122 additions & 30 deletions core/src/services/s3/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use base64::Engine;
use bytes::Buf;
use http::StatusCode;
use log::debug;
use log::warn;
use md5::Digest;
use md5::Md5;
use once_cell::sync::Lazy;
Expand Down Expand Up @@ -56,6 +57,7 @@ static ENDPOINT_TEMPLATES: Lazy<HashMap<&'static str, &'static str>> = Lazy::new

const DEFAULT_WRITE_MIN_SIZE: usize = 8 * 1024 * 1024;
const DEFAULT_BATCH_MAX_OPERATIONS: usize = 1000;

/// Aws S3 and compatible services (including minio, digitalocean space, Tencent Cloud Object Storage(COS) and so on) support.
/// For more information about s3-compatible services, refer to [Compatible Services](#compatible-services).
#[doc = include_str!("docs.md")]
Expand Down Expand Up @@ -518,20 +520,42 @@ impl S3Builder {
self
}

/// a helper function to make it easier to find region
/// Reference: [Amazon S3 HeadBucket API](https://docs.aws.amazon.com/zh_cn/AmazonS3/latest/API/API_HeadBucket.html)
/// Detect region of S3 bucket.
///
/// # Args
///
/// endpoint: the endpoint of S3 service
/// - endpoint: the endpoint of S3 service
/// - bucket: the bucket of S3 service
///
/// bucket: the bucket of S3 service
/// # Return
/// if get the region of given inputs, return Some(region)
/// else return None
///
/// # Usage
/// let b = S3Builder::default();
/// let region = b.detect_region("https://s3.amazonaws.com", "buckets").await;
/// - `Some(region)` means we detect the region successfully
/// - `None` means we can't detect the region or meeting errors.
///
/// # Notes
///
/// We will try to detect region by the following methods.
///
/// - Match endpoint with given rules to get region
/// - Cloudflare R2
/// - AWS S3
/// - Aliyun OSS
/// - Send a `HEAD` request to endpoint with bucket name to get `x-amz-bucket-region`.
///
/// # Examples
///
/// ```no_run
/// use opendal::services::S3;
///
/// # async fn example() {
/// let builder = S3::default();
/// let region: Option<String> = builder.detect_region("https://s3.amazonaws.com", "example").await;
/// # }
/// ```
///
/// # Reference
///
/// - [Amazon S3 HeadBucket API](https://docs.aws.amazon.com/zh_cn/AmazonS3/latest/API/API_HeadBucket.html)
pub async fn detect_region(&self, endpoint: &str, bucket: &str) -> Option<String> {
let mut endpoint = if endpoint.starts_with("http") {
endpoint.to_string()
Expand All @@ -540,24 +564,51 @@ impl S3Builder {
format!("https://{}", endpoint)
};

endpoint = endpoint.replace(&format!("//{0}.", bucket), "//");
let url = format!("{0}/{1}", endpoint, bucket);
// Remove bucket name from endpoint.
endpoint = endpoint.replace(&format!("//{bucket}."), "//");
let url = format!("{endpoint}/{bucket}");

debug!("backend detect region with url: {url}");
debug!("detect region with url: {url}");

let req = match http::Request::head(&url).body(AsyncBody::Empty) {
Ok(reg) => reg,
Err(_) => return None,
};
// Try to detect region by endpoint.

let client = match HttpClient::new() {
Ok(client) => client,
Err(_) => return None,
};
let res = match client.send(req).await {
Ok(res) => res,
Err(_) => return None,
};
// If this bucket is R2, we can return auto directly.
//
// Reference: <https://developers.cloudflare.com/r2/api/s3/api/>
if endpoint.ends_with("r2.cloudflarestorage.com") {
return Some("auto".to_string());
}

// If this bucket is AWS, we can try to match the endpoint.
if let Some(v) = endpoint.strip_prefix("https://s3.") {
if let Some(region) = v.strip_suffix(".amazonaws.com") {
return Some(region.to_string());
}
}

// If this bucket is OSS, we can try to match the endpoint.
//
// - `oss-ap-southeast-1.aliyuncs.com` => `oss-ap-southeast-1`
// - `oss-cn-hangzhou-internal.aliyuncs.com` => `oss-cn-hangzhou`
if let Some(v) = endpoint.strip_prefix("https://") {
if let Some(region) = v.strip_suffix(".aliyuncs.com") {
return Some(region.to_string());
}

if let Some(region) = v.strip_suffix("-internal.aliyuncs.com") {
return Some(region.to_string());
}
}

// Try to detect region by HeadBucket.
let req = http::Request::head(&url).body(AsyncBody::Empty).ok()?;

let client = HttpClient::new().ok()?;
let res = client
.send(req)
.await
.map_err(|err| warn!("detect region failed for: {err:?}"))
.ok()?;

debug!(
"auto detect region got response: status {:?}, header: {:?}",
Expand All @@ -566,11 +617,9 @@ impl S3Builder {
);

match res.status() {
// The endpoint works, return with not changed endpoint and
// default region.
StatusCode::OK | StatusCode::FORBIDDEN | StatusCode::MOVED_PERMANENTLY => {
let region = res.headers().get("x-amz-bucket-region").unwrap().to_str();
if let Ok(regin) = region {
let region = res.headers().get("x-amz-bucket-region")?;
if let Ok(regin) = region.to_str() {
Some(regin.to_string())
} else {
None
Expand Down Expand Up @@ -1112,10 +1161,53 @@ mod tests {
assert_eq!(endpoint, "https://test.s3.us-east-2.amazonaws.com");
}
}

#[tokio::test]
async fn test_detect_region() {
let cases = vec![
(
"aws s3 without region in endpoint",
"https://s3.amazonaws.com",
"example",
Some("us-east-1"),
),
(
"aws s3 with region in endpoint",
"https://s3.us-east-1.amazonaws.com",
"example",
Some("us-east-1"),
),
(
"oss with public endpoint",
"https://oss-ap-southeast-1.aliyuncs.com",
"example",
Some("oss-ap-southeast-1"),
),
(
"oss with internal endpoint",
"https://oss-cn-hangzhou-internal.aliyuncs.com",
"example",
Some("oss-cn-hangzhou-internal"),
),
(
"r2",
"https://abc.xxxxx.r2.cloudflarestorage.com",
"example",
Some("auto"),
),
(
"invalid service",
"https://opendal.apache.org",
"example",
None,
),
];

let b = S3Builder::default();
let region = b.detect_region("https://s3.amazonaws.com", "buckets").await;
assert_eq!(region, Some("us-east-1".to_string()))

for (name, endpoint, bucket, expected) in cases {
let region = b.detect_region(endpoint, bucket).await;
assert_eq!(region.as_deref(), expected, "{}", name);
}
}
}