add nextflow d30e48d
This commit is contained in:
89
nextflow/plugins/build.gradle
Normal file
89
nextflow/plugins/build.gradle
Normal file
@@ -0,0 +1,89 @@
|
||||
apply plugin: 'java'
|
||||
|
||||
ext.aws_access_key_id = project.findProperty('aws_access_key_id') ?: System.getenv('AWS_ACCESS_KEY_ID')
|
||||
ext.aws_secret_access_key = project.findProperty('aws_secret_access_key') ?: System.getenv('AWS_SECRET_ACCESS_KEY')
|
||||
ext.publishRepoUrl = project.findProperty('publish_repo_url') ?: System.getenv('PUBLISH_REPO_URL') ?: ( version.endsWith('-SNAPSHOT') ? "s3://maven.seqera.io/snapshots" : "s3://maven.seqera.io/releases" )
|
||||
|
||||
jar.enabled = false
|
||||
|
||||
subprojects {
|
||||
apply plugin: 'java'
|
||||
apply plugin: 'groovy'
|
||||
apply plugin: 'maven-publish'
|
||||
|
||||
repositories {
|
||||
mavenLocal()
|
||||
mavenCentral()
|
||||
}
|
||||
|
||||
group = 'io.nextflow'
|
||||
version = project.file('VERSION').text.trim()
|
||||
|
||||
tasks.withType(GenerateModuleMetadata) {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the plugin dependencies in the subproject `build/target/libs` directory
|
||||
*/
|
||||
task copyPluginLibs(type: Sync) {
|
||||
group 'nextflow'
|
||||
from configurations.runtimeClasspath
|
||||
into 'build/target/libs'
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the plugin manifest to resources directory for dev mode discovery.
|
||||
* In dev mode, pf4j looks for META-INF/MANIFEST.MF in the classpath directories,
|
||||
* but Gradle only generates it during JAR creation. This task copies the manifest
|
||||
* to the resources directory after the JAR is built.
|
||||
*/
|
||||
task copyPluginManifest(type: Copy) {
|
||||
group 'nextflow'
|
||||
from 'build/tmp/jar/MANIFEST.MF'
|
||||
into 'build/resources/main/META-INF'
|
||||
dependsOn jar
|
||||
}
|
||||
// Ensure manifest is available for test classpath (needed for dev mode plugin discovery)
|
||||
tasks.matching { it.name == 'test' }.configureEach {
|
||||
dependsOn copyPluginManifest
|
||||
}
|
||||
// Ensure packagePlugin task depends on copyPluginManifest to avoid implicit dependency issues
|
||||
tasks.matching { it.name == 'packagePlugin' }.configureEach {
|
||||
dependsOn copyPluginManifest
|
||||
}
|
||||
|
||||
/*
|
||||
* publish jars maven repo on S3
|
||||
*/
|
||||
publishing {
|
||||
publications {
|
||||
maven(MavenPublication) {
|
||||
from components.java
|
||||
suppressPomMetadataWarningsFor('testFixturesApiElements')
|
||||
suppressPomMetadataWarningsFor('testFixturesRuntimeElements')
|
||||
}
|
||||
}
|
||||
repositories {
|
||||
maven {
|
||||
name = 'Seqera'
|
||||
url = publishRepoUrl
|
||||
credentials(AwsCredentials) {
|
||||
// keys are defined in the `gradle.properties` file
|
||||
accessKey aws_access_key_id
|
||||
secretKey aws_secret_access_key
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* "install" the plugin the project root build/plugins directory
|
||||
*/
|
||||
project.parent.tasks.getByName("assemble").dependsOn << assemble
|
||||
|
||||
/*
|
||||
* Copies the plugins required dependencies in the corresponding lib directory
|
||||
*/
|
||||
classes.dependsOn subprojects.copyPluginLibs
|
||||
3
nextflow/plugins/gradle.properties
Normal file
3
nextflow/plugins/gradle.properties
Normal file
@@ -0,0 +1,3 @@
|
||||
## plugins settings
|
||||
nextflowPluginVersion=1.0.0-beta.14
|
||||
nextflowPluginProvider=nextflow-io
|
||||
89
nextflow/plugins/nf-amazon/README.md
Normal file
89
nextflow/plugins/nf-amazon/README.md
Normal file
@@ -0,0 +1,89 @@
|
||||
# Amazon Web Services plugin for Nextflow
|
||||
|
||||
## Summary
|
||||
|
||||
The Amazon Web Services (AWS) plugin provides support for AWS, including AWS Batch as a compute executor, S3 as a file system, and Fusion file system for high-performance data operations.
|
||||
|
||||
## Get started
|
||||
|
||||
To use this plugin, add it to your `nextflow.config`:
|
||||
|
||||
```groovy
|
||||
plugins {
|
||||
id 'nf-amazon'
|
||||
}
|
||||
```
|
||||
|
||||
Configure your AWS credentials using environment variables, AWS CLI profiles, or IAM roles. Then set up the executor and work directory:
|
||||
|
||||
```groovy
|
||||
process.executor = 'awsbatch'
|
||||
process.queue = '<YOUR BATCH QUEUE>'
|
||||
workDir = 's3://<YOUR BUCKET>/work'
|
||||
|
||||
aws {
|
||||
region = 'us-east-1'
|
||||
batch {
|
||||
cliPath = '/home/ec2-user/miniconda/bin/aws'
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
### Basic AWS Batch configuration
|
||||
|
||||
```groovy
|
||||
plugins {
|
||||
id 'nf-amazon'
|
||||
}
|
||||
|
||||
process.executor = 'awsbatch'
|
||||
process.queue = 'my-batch-queue'
|
||||
workDir = 's3://my-bucket/work'
|
||||
|
||||
aws {
|
||||
region = 'eu-west-1'
|
||||
batch {
|
||||
cliPath = '/home/ec2-user/miniconda/bin/aws'
|
||||
jobRole = 'arn:aws:iam::123456789:role/MyBatchJobRole'
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Using Fusion file system
|
||||
|
||||
```groovy
|
||||
fusion {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
wave {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
process.executor = 'awsbatch'
|
||||
workDir = 's3://my-bucket/work'
|
||||
```
|
||||
|
||||
### S3 storage options
|
||||
|
||||
```groovy
|
||||
aws {
|
||||
client {
|
||||
maxConnections = 20
|
||||
connectionTimeout = 10000
|
||||
storageEncryption = 'AES256'
|
||||
}
|
||||
region = 'us-east-1'
|
||||
}
|
||||
```
|
||||
|
||||
## Resources
|
||||
|
||||
- [AWS Batch Executor Documentation](https://nextflow.io/docs/latest/aws.html)
|
||||
- [Amazon S3 Storage Documentation](https://nextflow.io/docs/latest/aws.html#s3-storage)
|
||||
|
||||
## License
|
||||
|
||||
[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0)
|
||||
1
nextflow/plugins/nf-amazon/VERSION
Normal file
1
nextflow/plugins/nf-amazon/VERSION
Normal file
@@ -0,0 +1 @@
|
||||
3.9.0
|
||||
83
nextflow/plugins/nf-amazon/build.gradle
Normal file
83
nextflow/plugins/nf-amazon/build.gradle
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
plugins {
|
||||
id 'io.nextflow.nextflow-plugin' version "${nextflowPluginVersion}"
|
||||
id 'java-test-fixtures'
|
||||
}
|
||||
|
||||
nextflowPlugin {
|
||||
nextflowVersion = '26.03.4-edge'
|
||||
|
||||
provider = "${nextflowPluginProvider}"
|
||||
description = 'Provides comprehensive AWS cloud integration including Batch executor, S3 file system, and Fusion support for high-performance data operations'
|
||||
className = 'nextflow.cloud.aws.AmazonPlugin'
|
||||
useDefaultDependencies = false
|
||||
generateSpec = false
|
||||
extensionPoints = [
|
||||
'nextflow.cloud.aws.batch.AwsBatchExecutor',
|
||||
'nextflow.cloud.aws.config.AwsConfig',
|
||||
'nextflow.cloud.aws.fusion.AwsFusionEnv',
|
||||
'nextflow.cloud.aws.mail.AwsMailProvider',
|
||||
'nextflow.cloud.aws.util.S3PathFactory',
|
||||
'nextflow.cloud.aws.util.S3PathSerializer',
|
||||
]
|
||||
}
|
||||
|
||||
sourceSets {
|
||||
main.java.srcDirs = []
|
||||
main.groovy.srcDirs = ['src/main']
|
||||
main.resources.srcDirs = ['src/resources']
|
||||
test.groovy.srcDirs = ['src/test']
|
||||
test.java.srcDirs = []
|
||||
test.resources.srcDirs = ['src/testResources']
|
||||
}
|
||||
|
||||
configurations {
|
||||
// see https://docs.gradle.org/4.1/userguide/dependency_management.html#sub:exclude_transitive_dependencies
|
||||
runtimeClasspath.exclude group: 'org.slf4j', module: 'slf4j-api'
|
||||
}
|
||||
|
||||
dependencies {
|
||||
compileOnly project(':nextflow')
|
||||
compileOnly 'org.slf4j:slf4j-api:2.0.17'
|
||||
compileOnly 'org.pf4j:pf4j:3.14.1'
|
||||
|
||||
api ('javax.xml.bind:jaxb-api:2.4.0-b180830.0359')
|
||||
api ('software.amazon.awssdk:s3:2.33.2')
|
||||
api ('software.amazon.awssdk:ec2:2.33.2')
|
||||
api ('software.amazon.awssdk:batch:2.33.2')
|
||||
api ('software.amazon.awssdk:iam:2.33.2')
|
||||
api ('software.amazon.awssdk:ecs:2.33.2')
|
||||
api ('software.amazon.awssdk:cloudwatchlogs:2.33.2')
|
||||
api ('software.amazon.awssdk:codecommit:2.33.2')
|
||||
api ('software.amazon.awssdk:sts:2.33.2')
|
||||
api ('software.amazon.awssdk:ses:2.33.2')
|
||||
api ('software.amazon.awssdk:sso:2.33.2')
|
||||
api ('software.amazon.awssdk:ssooidc:2.33.2')
|
||||
api ('software.amazon.awssdk:s3-transfer-manager:2.33.2')
|
||||
api ('software.amazon.awssdk:apache-client:2.33.2')
|
||||
api ('software.amazon.awssdk:aws-crt-client:2.33.2')
|
||||
|
||||
// address security vulnerabilities
|
||||
implementation 'io.netty:netty-common:4.1.132.Final'
|
||||
implementation 'io.netty:netty-handler:4.1.132.Final'
|
||||
implementation 'io.netty:netty-codec-http2:4.1.132.Final'
|
||||
|
||||
testImplementation(testFixtures(project(":nextflow")))
|
||||
testImplementation project(':nextflow')
|
||||
testImplementation "org.apache.groovy:groovy:4.0.31"
|
||||
testImplementation "org.apache.groovy:groovy-nio:4.0.31"
|
||||
}
|
||||
376
nextflow/plugins/nf-amazon/changelog.txt
Normal file
376
nextflow/plugins/nf-amazon/changelog.txt
Normal file
@@ -0,0 +1,376 @@
|
||||
nf-amazon changelog
|
||||
===================
|
||||
3.9.0 - 25 Apr 2026
|
||||
- Add hints process directive for executor-specific scheduling hints (#7034) [406358e03]
|
||||
|
||||
3.8.3 - 20 Apr 2026
|
||||
- Fix inconsistent indentation in nf-amazon (#7047) [df6855d7d]
|
||||
- Fix S3FileSystemProvider.newInputStream() draining full object on close (#7046) [cf3867604]
|
||||
- Apply socket timeout to S3 CRT connections (#7024) [6f4a21764]
|
||||
- Manage AWS SDK exceptions to convert to the appropriate IO exceptions (#6707) [39c755663]
|
||||
|
||||
3.8.2 - 7 Apr 2026
|
||||
- Bump org.apache.groovy from 4.0.30 to 4.0.31 (#6985) [62a391588]
|
||||
- Bump org.pf4j:pf4j from 3.12.0 to 3.14.1 (#6983) [95aba07a3]
|
||||
- Bump io.netty:netty-codec-http2 from 4.1.129.Final to 4.1.132.Final (#6981) [d12cdc61e]
|
||||
|
||||
3.8.1 - 26 Mar 2026
|
||||
- Fix AWS Batch machine type trace for new instance families (#6952) [06e78ba0b]
|
||||
- Fix download of empty files in old minio based S3 transfers (#6944) [ccded1845]
|
||||
- Fix jackson-databind dependency in nf-amazon (#6941) [321c57f88]
|
||||
- Fix security vulnerabilities (#6938) [8b1ab40c4]
|
||||
|
||||
3.8.0 - 17 Mar 2026
|
||||
- Add multi-arch support to arch process directive (#6897) [c7ca36902]
|
||||
|
||||
3.7.1 - 28 Feb 2026
|
||||
- Fix S3 lookup unbounded pagination with double call (#6851) [a2e67eb99]
|
||||
- Fix S3 delete throwing DirectoryNotEmptyException due to eventual consistency (#6833) [f3ac49754]
|
||||
|
||||
3.7.0 - 8 Feb 2026
|
||||
- Fix isCompleted check in getNumSpotInterruptions (#6805) [76558481a]
|
||||
- Fix AWS Batch spot instance detection (#6722) [29356f60e]
|
||||
- Fix error when checking whether an S3 bucket exists (#6706) [c1bd552ab]
|
||||
- Remove isCompleted() from getNumSpotInterruptions (#6729) [24cc59e27]
|
||||
- Add aws.batch.forceGlacierTransfer config option (#6700) [e3bf3153b]
|
||||
|
||||
3.6.0 - 19 Dec 2025
|
||||
- Add spot interruption tracking to trace records (#6606) [eecd81671]
|
||||
|
||||
3.5.0 - 28 Nov 2025
|
||||
- Optimize exit code handling by relying on scheduler status for successful executions (#6484) [454a2ae85]
|
||||
|
||||
3.4.2 - 28 Nov 2025
|
||||
- Fix incorrect AWS region when specifying a S3 regional endpoint (#6530) [770bdd3eb]
|
||||
- Fix unordered completed parts in AWS multipart upload (#6560) [89eb70130]
|
||||
|
||||
3.4.1 - 22 Oct 2025
|
||||
- Fix no secrets in AWS Batch jobs (#6499) [c76c32582]
|
||||
- Simplify S3 configuration options (#6496) [0b7f18049]
|
||||
|
||||
3.4.0 - 21 Oct 2025
|
||||
- Limit S3 client connections when using virtual threads (#6369) [295c60b5c]
|
||||
- Rename `config.schema` package to `config.spec` (#6485) [ef0d2d601]
|
||||
|
||||
3.3.0 - 8 Oct 2025
|
||||
- Fix SIGTERM forwarding in AWS Batch jobs (#6414) [abbef79b7]
|
||||
- Limit S3 concurrent downloads to fix Java Heap OOM (#6402) [fd71d0e8c]
|
||||
|
||||
3.1.0 - 15 Aug 2025
|
||||
- Fix AWS transfer manager anonymous fallback (#6296) [ci fast] [ed5c99e1]
|
||||
- Fix silent failure when downloading a directory with AWS SDK v2 (#6266) [ci fast] [2d76d8f0]
|
||||
- Unify nf-lang config scopes with runtime classes (#6271) [bfa67ca3]
|
||||
- Update Azure and AWS deps (#6343) [ci fast] [ff00e2de]
|
||||
- Bump groovy 4.0.28 (#6304) [ci fast] [a468f8ef]
|
||||
- Bump netty-codec-http2:4.1.124.Final [7e690b44]
|
||||
|
||||
3.0.0 - 6 Jul 2025
|
||||
- Fix AWS nio tests [ci fast] [069653dd]
|
||||
- Fix replace List.getFirst with List.get(0) for compatibility [83428ee2]
|
||||
- Upgrade to AWS Java SDK v2 (#6165) [fc99b447]
|
||||
- Bump Slf4j version 2.0.17 [93199e09]
|
||||
|
||||
2.15.0 - 8 May 2025
|
||||
- Add verbose AWS Batch job cleanup logging [504bd2df]
|
||||
- Remove test constructors or mark as TestOnly (#5216) [d4fadd42]
|
||||
|
||||
2.14.0 - 23 Apr 2025
|
||||
- Add support for data and execution lineage (#5715) [20e06da7]
|
||||
- Manage AWS Batch Unscheduled jobs (#5936) [44abe60c]
|
||||
- Workflow outputs (third preview) (#5909) [2e2dea42]
|
||||
- Add support for Fusion Snapshots (#5954) [d7f047f8]
|
||||
|
||||
2.13.0 - 17 Mar 2025
|
||||
- Fix Consider AWS China as custom endpoint (#5840) [232ce9d1]
|
||||
- Fix Prevent S3 global option when using custom endpoints (#5779) [ed9da469]
|
||||
- Bump groovy 4.0.26 [f740bc56]
|
||||
|
||||
2.12.0 - 12 Feb 2025
|
||||
- Fix bugs with workflow outputs (#5502) [ab59d30f]
|
||||
- Fail the run if publish thread pool times out (#5578) [5325e5a6]
|
||||
- Bump groovy 4.0.25 [19c40a4a]
|
||||
- Bump io.netty:netty-handler:4.1.118.Final [db4a9037]
|
||||
|
||||
2.11.0 - 20 Jan 2025
|
||||
- Disable AWS SDK v1 warning [ci fast] [cd00a26d]
|
||||
- Ensure job is killed when exception in task status check (#5561) [9eefd207]
|
||||
- Bump logback 1.5.13 + slf4j 2.0.16 [cc0163ac]
|
||||
- Bump groovy 4.0.24 missing deps [40670f7e]
|
||||
|
||||
2.10.0 - 3 Dec 2024
|
||||
- Detecting errors in data unstaging (#5345) [3c8e602d]
|
||||
- Prevent NPE with null AWS Batch response [12fc1d60]
|
||||
- Fix Fargate warning on memory check (#5475) [bdf0ad00]
|
||||
- Bump groovy 4.0.24 [dd71ad31]
|
||||
- Bump aws sdk 1.12.777 (#5458) [8bad0b4b]
|
||||
- Bump netty-common to version 4.1.115.Final [d1bbd3d0]
|
||||
|
||||
2.9.0 - 2 Oct 2024
|
||||
- Add Platform workflow prefix in AWS Batch job names (#5318) [e2e test] [42dd4ba8]
|
||||
- Fix AWS spot attempts with zero value (#5331) [ci fast] [bac2da12]
|
||||
- Bump groovy 4.0.23 (#5303) [ci fast] [fe3e3ac7]
|
||||
|
||||
2.8.0 - 4 Sep 2024
|
||||
- Disable AWS spot retry (#5215) [f28fcb25]
|
||||
|
||||
2.7.0 - 5 Aug 2024
|
||||
- More robust parsing of shm-size containerOptions (#5177) [b56802a3]
|
||||
- Fix AWS Cloudwatch access when using custom log group name [30195838]
|
||||
- Fix Prevent AWS Batch retry the job execution when the container does not exist [4e218f22]
|
||||
- Fix Invalid AWS Fargate CPUs usage error reporting [d9c50e59]
|
||||
- Bump amazon sdk to version 1.12.766 [cc6ec314]
|
||||
- Bump pf4j to version 3.12.0 [96117b9a]
|
||||
|
||||
2.6.0 - 17 Jun 2024
|
||||
- Allow requester pays for S3 buckets (#5027) [0070c1b0]
|
||||
- Fix support for s5cmd 2.2.2 (#5069) [7e78bd4d]
|
||||
- Bump aws-sdk 1.12.740 [acad2a1f]
|
||||
|
||||
2.5.3 - 1 Aug 2024
|
||||
- More robust parsing of shm-size containerOptions (#5177) [98cf0068]
|
||||
- Bump amazon sdk to version 1.12.766 [5ce42b79]
|
||||
- Bump pf4j to version 3.12.0 [1a8f086a]
|
||||
|
||||
2.5.2 - 20 May 2024
|
||||
- Fix nf-amazon plugin dependency [c234b09f]
|
||||
|
||||
2.5.1 - 14 May 2024
|
||||
- Use protected visibility for updateStatus method [6871ba06]
|
||||
|
||||
2.5.0 - 13 May 2024
|
||||
- Add support for Job arrays (#3892) [ca9bc9d4]
|
||||
- Fix Use fully qualified S3 uris in error message (#4923) [f1cffd1b]
|
||||
|
||||
2.4.2 - 15 Apr 2024
|
||||
- Improve retry logic for AWS Batch executor [62926c28]
|
||||
- Bump groovy 4.0.21 [9e08390b]
|
||||
|
||||
2.4.1 - 10 Mar 2024
|
||||
- Fix Error while publishing S3 file with blanks [b74c0227]
|
||||
- Update copyright info [e3089f0e]
|
||||
- Bump groovy 4.0.19 [854dc1f0]
|
||||
|
||||
2.4.0 - 5 Feb 2024
|
||||
- Remove Glacier auto-retrieval (#4705) [5f0ec50d]
|
||||
- Bump nextflow 23.12.0-edge as min version [63e83702]
|
||||
- Bump Groovy 4 (#4443) [9d32503b]
|
||||
|
||||
2.3.0 - 20 Dec 2023
|
||||
- Add AWS_SESSION_TOKEN to Fusion environment (#4581) [552f29b0]
|
||||
- Add experimental support for Fargate compute type for AWS Batch (#3474) [47cf335b]
|
||||
|
||||
2.2.0 - 24 Nov 2023
|
||||
- Add support for FUSION_AWS_REGION (#4481) [8f8b09fa]
|
||||
- Fix security vulnerabilities (#4513) [a310c777]
|
||||
- Fix typos (#4519) [ci fast] [6b1ea726]
|
||||
- Fix Fusion symlinks when publishing files (#4348) [89f09fe0]
|
||||
- Bump javax.xml.bind:jaxb-api:2.4.0-b180830.0359
|
||||
|
||||
2.1.4-patch3 - 30 Jul 2024
|
||||
- Bump amazon sdk to version 1.12.766 [189f58ed]
|
||||
- Bump pf4j to version 3.12.0 [8dfa4076]
|
||||
|
||||
2.1.4-patch2 - 11 Jun 2024
|
||||
- Fix security vulnerabilities (#5057) [6d8765b8]
|
||||
|
||||
2.1.4-patch1 - 28 May 2024
|
||||
- Bump dependency with Nextflow 23.10.2
|
||||
|
||||
2.1.4 - 10 Oct 2023
|
||||
- Improve S3 endpoint validation [2b9ae6aa]
|
||||
- Add -cloudcache CLI option (#4385) [73fda582]
|
||||
|
||||
2.1.3 - 28 Sep 2023
|
||||
- Fix minor typos in changelogs/source code (#4319) [4ce9f1df]
|
||||
- Fix List of S3 bucket for custom endpoint [4327fa58]
|
||||
- Fix support for S3 custom endpoint with anonymous access [03752815]
|
||||
- Fix Prevent multi attempts to retrieve AWS creds [b30efe36]
|
||||
|
||||
2.1.2 - 10 Sep 2023
|
||||
- Disable staging script for remote work dir (#4282) [80f7cd46]
|
||||
- Allow setting shell directive when using the trace file. (#4210) [7d6ad624]
|
||||
- Bump groovy 3.0.19 [cb411208]
|
||||
|
||||
2.1.1 - 5 Aug 2023
|
||||
- Fix glob resolution for remove files [19a72c40]
|
||||
- Fix Option fixOwnership traverse parent directories [f2a2ea35]
|
||||
|
||||
2.1.0 - 22 Jul 2023
|
||||
- Add support for AWS SSO credentials provider (#4045) [53e33cde]
|
||||
- Wait for all child processes in nxf_parallel (#4050) [60a5f1a7]
|
||||
- Ignore accelerator type for AWS Batch (#4043) [263ecca8]
|
||||
- Bump Groovy 3.0.18 [207eb535]
|
||||
|
||||
2.0.1 - 14 Jun 2023
|
||||
- Add support for AWS SES as mail sending provider [df85d443]
|
||||
- Fix access to public S3 bucket when no creds are provided (#3992) [cf8ba466]
|
||||
- Fix S3 path normalization [b75ec444]
|
||||
|
||||
2.0.0 - 15 May 2023
|
||||
- Add fusion.exportStorageCredentials option [ci fast] [acb6aedf]
|
||||
- Fix AWS SSE env propagation to Fusion [e24608c3]
|
||||
- Fix string comparison in S3 client (#3875) [ci fast] [9344d294]
|
||||
- Preview support for virtual threads (#3871) [5c429046]
|
||||
- Refactor the AWS configuration (#3498) [a74e42d9]
|
||||
- Rename AmazonS3Client to S3Client [cc59596a]
|
||||
- Rename com.upplication.s3fs package to nextflow.cloud.aws.nio [a2f3bb24]
|
||||
- Security fixes [973b7bea]
|
||||
- Update logging libraries [d7eae86e]
|
||||
- Bump groovy 3.0.17 [cfe4ba56]
|
||||
|
||||
1.16.2 - 15 Apr 2023
|
||||
- Update plugin deps [83e8fd6a]
|
||||
|
||||
1.16.1 - 1 Apr 2023
|
||||
- Fix NoSuchMethodError String.stripIndent with Java 11 [308eafe6]
|
||||
|
||||
1.16.0 - 19 Mar 2023
|
||||
- Remove unused classes [9fa8d75b]
|
||||
- Add support for AWS S3 Glacier Retrieval Tier (#3680) [fab6bd5e]
|
||||
- Bump groovy 3.0.16 [d3ff5dcb]
|
||||
|
||||
1.15.0 - 21 Feb 2023
|
||||
- Improve AWS batch error logging [8f4884c1]
|
||||
- Remove deprecated buckets field [0a355ac3]
|
||||
- Add support for fusion tags (#3609) [ci fast] [8385ec22]
|
||||
- Add nextflow tags to AWS Batch job def [b465ac52]
|
||||
- Use Fusion as launcher (#3584) [34a27733]
|
||||
- Bump FUSION_ prefix variables [ci fast] [a7282d64]
|
||||
- Fix serialization of S3 paths with spaces (#3565) [ce487624]
|
||||
- Bump groovy 3.0.15 [7a3ebc7d]
|
||||
|
||||
1.14.0 - 14 Jan 2023
|
||||
- Add `schedulingPriority` option to AWS Batch (use with `shareIdentifier`) (#3505) [06960bb2]
|
||||
- Fix FilePorter concurrency issue (#3511) [11ccfa26]
|
||||
- Fix support for AWS ACL for Batch #2671 [a9644919]
|
||||
- Improve container native executor configuration [03126371]
|
||||
- Improve AWS batch exit code reporting [d1bb2fe2]
|
||||
- Refactor Fusion config [902e5b34]
|
||||
- Refactor Fusion package [52f4c5d5]
|
||||
- Remove unnecessary Fusion env var [dfa47556]
|
||||
- Bump groovy 3.0.14 [7c204236]
|
||||
|
||||
1.13.0 - 13 Dec 2022
|
||||
- Add support for AWS Glacier restore [b6110766]
|
||||
- Add support for S3 storageClass to publishDir [066f9203]
|
||||
- Fix math overflow when copying large AWS S3 files [f32ea0ba]
|
||||
- Bump AWS sdk version 1.12.351 [4dd82b66]
|
||||
- Rewrite fetchIamRole and fetchRegion to use AWS SDK (#3425) [ci skip] [e350f319]
|
||||
- Bump nf-amazon@1.13.0 [ccaab713]
|
||||
|
||||
1.12.0 - 21 Nov 2022
|
||||
- Improve S3 thread pool config [41021cbc]
|
||||
|
||||
1.11.0 - 3 Oct 2022
|
||||
- Add support for custom S3 content type [02afa332] <Paolo Di Tommaso>
|
||||
- Get rid of file name rolling for report files [a762ed59] <Paolo Di Tommaso>
|
||||
|
||||
1.10.7 - 28 Sep 2022
|
||||
- Fix Issue copying file bigger than 5gb to S3 [18fd9a44]
|
||||
|
||||
1.10.6 - 26 Sep 2022
|
||||
- Add tags propagation to AWS Batch [d64eeffc]
|
||||
|
||||
1.10.5 - 20 Sep 2022
|
||||
- Fix AWS S3 copy object [b3b90d23]
|
||||
|
||||
1.10.4 - 13 Sep 2020
|
||||
- Add STS library to enable use of IRSA in EKS cluster [62df42c3]
|
||||
|
||||
1.10.3 - 10 Sep 2022
|
||||
- S3 min upload size 5MB [9926d15d]
|
||||
- Use smaller buffer size for s3 stream uploader [8c643074]
|
||||
|
||||
1.10.2 - 7 Sep 2022
|
||||
- Fix thread pool race condition on shutdown [8d2b0587]
|
||||
- Fix Intermediate multipart upload requires a minimum size (#3193) [0b66aed6]
|
||||
|
||||
1.10.1 - 1 Sep
|
||||
- Add support for label/tags (#2853) [5d0b7c35]
|
||||
- Add fusion support to local executor [17160bb0]
|
||||
- Improve fusion env handling [10f35b60]
|
||||
- Get rid of remote bin dir [6cfb51e7]
|
||||
|
||||
1.10.0 - 11 Aug
|
||||
- Improve S3 copy via xfer manager [02d2beae]
|
||||
- Add experimental fusion support [1854f1f2]
|
||||
- Increase S3 upload chunk size to 100 MB [9c94a080]
|
||||
- Add support for AWS Batch logs group (#3092) [4ef043ac]
|
||||
- Fix queueSize setting is not honoured by AWS Batch executor (#3093) [d07bb52b]
|
||||
- Add share identifier to Aws Batch (#3089) [c0253aba]
|
||||
|
||||
1.9.0 - 1 Aug 2022
|
||||
- Fix Unable to disable scratch attribute with AWS Batch [1770f73a]
|
||||
- Fix NPE while setting S3 ObjectMetadata #3031 [d6163431] <Jorge Aguilera>
|
||||
- Fix Unable to retrieve AWS batch instance type #1658 [3c4d4d3b] <Paolo Di Tommaso>
|
||||
- Fix AWS Batch job definition conflict (#3048) [e5084418] <Paolo Di Tommaso>
|
||||
- Improve S3 file upload/download via Transfer manager [7e8d2a5a] [b7bf9fe5] <Jorge Aguilera>
|
||||
|
||||
1.8.1 - 13 Jul 2022
|
||||
- Fix Exception when settings AWS Batch containerOptions #3019 [89312ad8] <Paolo Di Tommaso>
|
||||
- Add docs aws.client.s3PathStyleAccess config (#3000) [20005500] <Abhinav Sharma>
|
||||
|
||||
1.8.0 - 25 May 2022
|
||||
- Add support for custom KMS keys
|
||||
- Add support for virtual file system move operation [8c0ddfd5]
|
||||
|
||||
1.7.2 - 15 Apr 2022
|
||||
- Fix Aws Batch retry policy on spot reclaim [d855f0d9]
|
||||
|
||||
1.7.1 - 23 Apr 2022
|
||||
- Add config option `aws.client.anonymous` to allow the access of public buckets
|
||||
- Add config option `aws.client.debug`
|
||||
- Fix SS3 storage encryption flag for Batch submit job
|
||||
- Change upload default chunk size to 20MB
|
||||
|
||||
1.7.0 - 6 Apr 2022
|
||||
- Add native handling for spot instance interruptions
|
||||
- Add config option `aws.batch.maxSpotAttempts`
|
||||
- Add fetching container reason attribute on Batch job failure
|
||||
|
||||
1.6.0 - 27 Mar 2022
|
||||
- Handle AWS Spot interruption automatically job retry
|
||||
- Refactor AWS Batch job unique token generation to prevent
|
||||
"vCPUs and Memory Values Not Overridden" error see
|
||||
https://github.com/nextflow-io/nextflow/issues/2561
|
||||
|
||||
1.5.2 - 25 Feb 2022
|
||||
- Prevent S3 tagging with empty list
|
||||
|
||||
1.5.1 - 19 Feb 2022
|
||||
- Fix Batch Job name exceed max allowed size
|
||||
|
||||
1.5.0 - 21 Jan 2022
|
||||
- Add support for secrets management to AWS Batch executor
|
||||
- Enable parallel s3 downloads by default
|
||||
|
||||
1.4.0 - 20 Dec 2021
|
||||
- Move s3fs client implementation into nf-amazon module
|
||||
- Bump AWS sdk 1.12.129
|
||||
|
||||
1.3.4 - 29 Nov 2021
|
||||
- Bump s3fs version 1.1.3
|
||||
- Fix S3 file exists check on provider not supporting Get object ACL
|
||||
|
||||
1.3.3 - 22 Nov 2021
|
||||
- Fix Missing AWS SSE encryption for begin and exit task files #2452
|
||||
|
||||
1.3.2 - 18 Nov 2021
|
||||
- Bump s3fs version 1.1.2 (fix issue propagating KMS key while copying S3 file across encrypted buckets)
|
||||
|
||||
1.3.1 - 11 Nov 2021
|
||||
- Use ResourceRequirement instead of deprecated APIs for cpus and mem requirement
|
||||
|
||||
1.3.0 - 28 Oct 2021
|
||||
- Improve failed task error reporting fetching logs from CloudWatch
|
||||
|
||||
1.2.2 - 19 Oct 2021
|
||||
- Fix issue evaluating dynamic errorStrategy [8c6a5a6] [ce4d491]
|
||||
|
||||
1.2.1 - 12 Oct 2021
|
||||
- Add retry on AWS Job definition not-found error [452cae5d8]
|
||||
|
||||
1.2.0 - 6 Aug 2021
|
||||
- Use AWS cli standard retry mode by default [f2f1fdea4]
|
||||
- Fix orphaned AWS Batch jobs after pipeline interruption #2169 [65e2a4d05]
|
||||
- Strengthen AWS Batch task handler [eb1aff275]
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package nextflow.cloud.aws
|
||||
|
||||
import nextflow.cloud.aws.nio.S3FileSystemProvider
|
||||
import groovy.transform.CompileStatic
|
||||
import nextflow.file.FileHelper
|
||||
import nextflow.plugin.BasePlugin
|
||||
import org.pf4j.PluginWrapper
|
||||
/**
|
||||
* Nextflow plugin for Amazon extensions
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@CompileStatic
|
||||
class AmazonPlugin extends BasePlugin {
|
||||
|
||||
AmazonPlugin(PluginWrapper wrapper) {
|
||||
super(wrapper)
|
||||
}
|
||||
|
||||
@Override
|
||||
void start() {
|
||||
super.start()
|
||||
FileHelper.getOrInstallProvider(S3FileSystemProvider)
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,324 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws
|
||||
|
||||
import nextflow.cloud.aws.nio.util.S3AsyncClientConfiguration
|
||||
import nextflow.cloud.aws.nio.util.S3SyncClientConfiguration
|
||||
import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider
|
||||
import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider
|
||||
import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider
|
||||
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials
|
||||
import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider
|
||||
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration
|
||||
import software.amazon.awssdk.core.exception.SdkClientException
|
||||
import software.amazon.awssdk.http.SdkHttpClient
|
||||
import software.amazon.awssdk.regions.Region
|
||||
import software.amazon.awssdk.regions.providers.InstanceProfileRegionProvider
|
||||
import software.amazon.awssdk.services.batch.BatchClient
|
||||
import software.amazon.awssdk.services.cloudwatchlogs.CloudWatchLogsClient
|
||||
import software.amazon.awssdk.services.ec2.Ec2Client
|
||||
import software.amazon.awssdk.services.ecs.EcsClient
|
||||
import software.amazon.awssdk.services.s3.S3AsyncClient
|
||||
import software.amazon.awssdk.services.s3.S3Client
|
||||
import software.amazon.awssdk.services.s3.S3Configuration
|
||||
import software.amazon.awssdk.services.s3.S3CrtAsyncClientBuilder
|
||||
import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration
|
||||
import software.amazon.awssdk.services.sts.StsClient
|
||||
import software.amazon.awssdk.services.sts.model.GetCallerIdentityRequest
|
||||
import software.amazon.awssdk.services.sts.model.StsException
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.transform.Memoized
|
||||
import groovy.util.logging.Slf4j
|
||||
import nextflow.SysEnv
|
||||
import nextflow.cloud.aws.config.AwsConfig
|
||||
import nextflow.exception.AbortOperationException
|
||||
/**
|
||||
* Implement a factory class for AWS client objects
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@Slf4j
|
||||
@CompileStatic
|
||||
class AwsClientFactory {
|
||||
|
||||
private AwsConfig config
|
||||
|
||||
/**
|
||||
* The AWS access key credentials (optional)
|
||||
*/
|
||||
private String accessKey
|
||||
|
||||
/**
|
||||
* The AWS secret key credentials (optional)
|
||||
*/
|
||||
private String secretKey
|
||||
|
||||
/**
|
||||
* The AWS region eg. {@code eu-west-1}. If it's not specified the current region is retrieved from
|
||||
* the EC2 instance metadata
|
||||
*/
|
||||
private String region
|
||||
|
||||
private String profile
|
||||
|
||||
/**
|
||||
* Initialise the Amazon cloud driver with default (empty) parameters
|
||||
*/
|
||||
AwsClientFactory() {
|
||||
this(new AwsConfig(Collections.emptyMap()))
|
||||
}
|
||||
|
||||
AwsClientFactory(AwsConfig config, String region=null) {
|
||||
this.config = config
|
||||
|
||||
if( config.accessKey && config.secretKey ) {
|
||||
this.accessKey = config.accessKey
|
||||
this.secretKey = config.secretKey
|
||||
}
|
||||
|
||||
// -- the required profile, if any
|
||||
this.profile
|
||||
= config.profile
|
||||
?: SysEnv.get('AWS_PROFILE')
|
||||
?: SysEnv.get('AWS_DEFAULT_PROFILE')
|
||||
|
||||
// -- get the aws default region
|
||||
this.region
|
||||
= region
|
||||
?: config.region
|
||||
?: SysEnv.get('AWS_REGION')
|
||||
?: SysEnv.get('AWS_DEFAULT_REGION')
|
||||
?: fetchRegion()
|
||||
|
||||
if( !this.region )
|
||||
throw new AbortOperationException('Missing AWS region -- Make sure to define in your system environment the variable `AWS_DEFAULT_REGION`')
|
||||
}
|
||||
|
||||
String accessKey() { accessKey }
|
||||
|
||||
String secretKey() { secretKey }
|
||||
|
||||
String region() { region }
|
||||
|
||||
String profile() { profile }
|
||||
|
||||
/**
|
||||
* Retrieve the current IAM role eventually define for a EC2 instance.
|
||||
* See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html#instance-metadata-security-credentials
|
||||
*
|
||||
* @return
|
||||
* The IAM role name associated to this instance or {@code null} if no role is defined or
|
||||
* it's not a EC2 instance
|
||||
*/
|
||||
protected String fetchIamRole() {
|
||||
try {
|
||||
final stsClient = StsClient.create()
|
||||
return stsClient.getCallerIdentity(GetCallerIdentityRequest.builder().build() as GetCallerIdentityRequest).arn();
|
||||
}
|
||||
catch (StsException e) {
|
||||
log.trace "Unable to fetch IAM credentials -- Cause: ${e.message}"
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the AWS region from the EC2 instance metadata.
|
||||
* See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html
|
||||
*
|
||||
* @return
|
||||
* The AWS region of the current EC2 instance eg. {@code eu-west-1} or
|
||||
* {@code null} if it's not an EC2 instance.
|
||||
*/
|
||||
private String fetchRegion() {
|
||||
try {
|
||||
return new InstanceProfileRegionProvider().getRegion().id();
|
||||
}
|
||||
catch (SdkClientException e) {
|
||||
log.debug("Cannot fetch AWS region", e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to map a region string to a {@link Region} object.
|
||||
*
|
||||
* @param region An AWS region string identifier eg. {@code eu-west-1}
|
||||
* @return A {@link Region} corresponding to the specified region string
|
||||
*/
|
||||
private Region getRegionObj(String region) {
|
||||
final result = Region.of(region)
|
||||
if( !result )
|
||||
throw new IllegalArgumentException("Not a valid AWS region name: $region");
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets or lazily creates an {@link Ec2Client} instance given the current
|
||||
* configuration parameter
|
||||
*
|
||||
* @return
|
||||
* An {@link Ec2Client} instance
|
||||
*/
|
||||
synchronized Ec2Client getEc2Client() {
|
||||
return Ec2Client.builder()
|
||||
.region(getRegionObj(region))
|
||||
.credentialsProvider(getCredentialsProvider0())
|
||||
.build()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets or lazily creates an {@link BatchClient} instance given the current
|
||||
* configuration parameter
|
||||
*
|
||||
* @return
|
||||
* An {@link BatchClient} instance
|
||||
*/
|
||||
@Memoized
|
||||
BatchClient getBatchClient() {
|
||||
return BatchClient.builder()
|
||||
.region(getRegionObj(region))
|
||||
.credentialsProvider(getCredentialsProvider0())
|
||||
.build()
|
||||
}
|
||||
|
||||
@Memoized
|
||||
EcsClient getEcsClient() {
|
||||
return EcsClient.builder()
|
||||
.region(getRegionObj(region))
|
||||
.credentialsProvider(getCredentialsProvider0())
|
||||
.build()
|
||||
}
|
||||
|
||||
@Memoized
|
||||
CloudWatchLogsClient getLogsClient() {
|
||||
return CloudWatchLogsClient.builder().region(getRegionObj(region)).credentialsProvider(getCredentialsProvider0()).build()
|
||||
}
|
||||
|
||||
S3Client getS3Client(S3SyncClientConfiguration s3ClientConfig, boolean global = false) {
|
||||
final SdkHttpClient.Builder httpClientBuilder = s3ClientConfig.getHttpClientBuilder()
|
||||
final ClientOverrideConfiguration overrideConfiguration = s3ClientConfig.getClientOverrideConfiguration()
|
||||
final builder = S3Client.builder()
|
||||
.crossRegionAccessEnabled(global)
|
||||
.credentialsProvider(getS3CredentialsProvider())
|
||||
.serviceConfiguration(S3Configuration.builder()
|
||||
.pathStyleAccessEnabled(config.s3Config.pathStyleAccess)
|
||||
.multiRegionEnabled(global)
|
||||
.build())
|
||||
|
||||
if( config.s3Config.endpoint )
|
||||
builder.endpointOverride(URI.create(config.s3Config.endpoint))
|
||||
|
||||
// AWS SDK v2 region must be always set, even when endpoint is overridden
|
||||
builder.region(getRegionObj(region))
|
||||
|
||||
if( httpClientBuilder != null )
|
||||
builder.httpClientBuilder(httpClientBuilder)
|
||||
|
||||
if( overrideConfiguration != null )
|
||||
builder.overrideConfiguration(overrideConfiguration)
|
||||
|
||||
return builder.build()
|
||||
}
|
||||
|
||||
S3AsyncClient getS3AsyncClient(S3AsyncClientConfiguration s3ClientConfig, boolean global = false) {
|
||||
def builder = S3AsyncClient.crtBuilder()
|
||||
.crossRegionAccessEnabled(global)
|
||||
.credentialsProvider(getS3CredentialsProvider())
|
||||
.forcePathStyle(config.s3Config.pathStyleAccess)
|
||||
.region(getRegionObj(region))
|
||||
if( config.s3Config.endpoint )
|
||||
builder.endpointOverride(URI.create(config.s3Config.endpoint))
|
||||
|
||||
final retryConfiguration = s3ClientConfig.getCrtRetryConfiguration()
|
||||
if( retryConfiguration != null )
|
||||
builder.retryConfiguration(retryConfiguration)
|
||||
|
||||
final httpConfiguration = s3ClientConfig.getCrtHttpConfiguration()
|
||||
if( httpConfiguration != null )
|
||||
builder.httpConfiguration(httpConfiguration)
|
||||
|
||||
final multipartConfig = s3ClientConfig.getMultipartConfiguration()
|
||||
if( multipartConfig != null )
|
||||
setMultipartConfiguration(multipartConfig, builder)
|
||||
|
||||
final throughput = s3ClientConfig.getTargetThroughputInGbps()
|
||||
if( throughput != null )
|
||||
builder.targetThroughputInGbps(throughput)
|
||||
|
||||
final nativeMemory = s3ClientConfig.getMaxNativeMemoryInBytes()
|
||||
if (nativeMemory != null )
|
||||
builder.maxNativeMemoryLimitInBytes(nativeMemory)
|
||||
|
||||
final maxConcurrency = s3ClientConfig.getMaxConcurrency()
|
||||
if( maxConcurrency != null )
|
||||
builder.maxConcurrency(maxConcurrency)
|
||||
|
||||
return builder.build()
|
||||
}
|
||||
/**
|
||||
* Returns an AwsCredentialsProvider for S3 clients.
|
||||
*
|
||||
* This method wraps the same AWS credentials used for other clients, but ensures proper handling of anonymous S3 access.
|
||||
* If the 'anonymous' flag is set in Nextflow's AWS S3 configuration, or if no credentials are resolved by other providers,
|
||||
* an AnonymousCredentialsProvider instance is returned.
|
||||
*
|
||||
* Prior to AWS SDK v2, the S3CredentialsProvider automatically managed fallback to anonymous access when no credentials were found.
|
||||
* However, due to a limitation in the AWS SDK v2 CRT Async S3 client (see https://github.com/aws/aws-sdk-java-v2/issues/5810),
|
||||
* anonymous credentials only work when explicitly configured via AnonymousCredentialsProvider.
|
||||
* Custom credential providers or provider chains that resolve to anonymous credentials are not handled correctly by the CRT client.
|
||||
*
|
||||
* To work around this, this method explicitly checks whether credentials can be resolved.
|
||||
* If no credentials are found, it returns an AnonymousCredentialsProvider; otherwise, it returns the resolved provider.
|
||||
*
|
||||
* @return an AwsCredentialsProvider instance, falling back to anonymous if needed.
|
||||
*/
|
||||
private AwsCredentialsProvider getS3CredentialsProvider() {
|
||||
if ( config.s3Config.anonymous )
|
||||
return AnonymousCredentialsProvider.create()
|
||||
def provider = getCredentialsProvider0()
|
||||
try {
|
||||
provider.resolveCredentials()
|
||||
} catch (Exception e) {
|
||||
log.debug("No AWS credentials available - falling back to anonymous access")
|
||||
return AnonymousCredentialsProvider.create()
|
||||
}
|
||||
return provider
|
||||
}
|
||||
|
||||
private void setMultipartConfiguration(MultipartConfiguration multipartConfig, S3CrtAsyncClientBuilder builder) {
|
||||
if( multipartConfig.minimumPartSizeInBytes() != null )
|
||||
builder.minimumPartSizeInBytes(multipartConfig.minimumPartSizeInBytes())
|
||||
if( multipartConfig.thresholdInBytes() != null )
|
||||
builder.thresholdInBytes(multipartConfig.thresholdInBytes())
|
||||
}
|
||||
|
||||
protected AwsCredentialsProvider getCredentialsProvider0() {
|
||||
if( accessKey && secretKey ) {
|
||||
return StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey))
|
||||
}
|
||||
|
||||
if( profile ) {
|
||||
return ProfileCredentialsProvider.builder()
|
||||
.profileName(profile)
|
||||
.build()
|
||||
}
|
||||
|
||||
return DefaultCredentialsProvider.create()
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,437 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch
|
||||
|
||||
import java.nio.file.Path
|
||||
import java.util.concurrent.TimeUnit
|
||||
import java.util.concurrent.TimeoutException
|
||||
|
||||
import software.amazon.awssdk.services.batch.BatchClient
|
||||
import software.amazon.awssdk.services.batch.model.BatchException
|
||||
import software.amazon.awssdk.services.ecs.model.AccessDeniedException
|
||||
import software.amazon.awssdk.services.cloudwatchlogs.model.ResourceNotFoundException
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.transform.PackageScope
|
||||
import groovy.util.logging.Slf4j
|
||||
import nextflow.cloud.aws.AwsClientFactory
|
||||
import nextflow.cloud.aws.config.AwsConfig
|
||||
import nextflow.cloud.aws.nio.S3Path
|
||||
import nextflow.cloud.types.CloudMachineInfo
|
||||
import nextflow.exception.AbortOperationException
|
||||
import nextflow.executor.Executor
|
||||
import nextflow.executor.TaskArrayExecutor
|
||||
import nextflow.extension.FilesEx
|
||||
import nextflow.fusion.FusionHelper
|
||||
import nextflow.processor.ParallelPollingMonitor
|
||||
import nextflow.processor.TaskHandler
|
||||
import nextflow.processor.TaskMonitor
|
||||
import nextflow.processor.TaskRun
|
||||
import nextflow.util.Duration
|
||||
import nextflow.util.Escape
|
||||
import nextflow.util.RateUnit
|
||||
import nextflow.util.ServiceName
|
||||
import nextflow.util.ThreadPoolHelper
|
||||
import nextflow.util.ThrottlingExecutor
|
||||
import org.pf4j.ExtensionPoint
|
||||
/**
|
||||
* AWS Batch executor
|
||||
* https://aws.amazon.com/batch/
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@Slf4j
|
||||
@ServiceName('awsbatch')
|
||||
@CompileStatic
|
||||
class AwsBatchExecutor extends Executor implements ExtensionPoint, TaskArrayExecutor {
|
||||
|
||||
/**
|
||||
* Proxy to throttle AWS batch client requests
|
||||
*/
|
||||
@PackageScope
|
||||
private AwsBatchProxy client
|
||||
|
||||
/** Helper class to resolve Batch related metadata */
|
||||
private AwsBatchHelper helper
|
||||
|
||||
/**
|
||||
* executor service to throttle service requests
|
||||
*/
|
||||
private ThrottlingExecutor submitter
|
||||
|
||||
/**
|
||||
* Executor service to throttle cancel requests
|
||||
*/
|
||||
private ThrottlingExecutor reaper
|
||||
|
||||
/**
|
||||
* A S3 path where executable scripts need to be uploaded
|
||||
*/
|
||||
private Path remoteBinDir = null
|
||||
|
||||
private AwsOptions awsOptions
|
||||
|
||||
private final Set<String> deletedJobs = new HashSet<>()
|
||||
|
||||
AwsOptions getAwsOptions() { awsOptions }
|
||||
|
||||
/**
|
||||
* @return {@code true} to signal containers are managed directly the AWS Batch service
|
||||
*/
|
||||
@Override
|
||||
final boolean isContainerNative() {
|
||||
return true
|
||||
}
|
||||
|
||||
@Override
|
||||
String containerConfigEngine() {
|
||||
return 'docker'
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {@code true} whenever the secrets handling is managed by the executing platform itself
|
||||
*/
|
||||
@Override
|
||||
final boolean isSecretNative() {
|
||||
return true
|
||||
}
|
||||
|
||||
@Override
|
||||
Path getWorkDir() {
|
||||
session.bucketDir ?: session.workDir
|
||||
}
|
||||
|
||||
protected void validateWorkDir() {
|
||||
/*
|
||||
* make sure the work dir is a S3 bucket
|
||||
*/
|
||||
if( !(workDir instanceof S3Path) ) {
|
||||
session.abort()
|
||||
throw new AbortOperationException("When using `$name` executor an S3 bucket must be provided as working directory using either the `-bucket-dir` or `-work-dir` command line option")
|
||||
}
|
||||
}
|
||||
|
||||
protected void validatePathDir() {
|
||||
def path = session.config.navigate('env.PATH')
|
||||
if( path ) {
|
||||
log.warn "Environment PATH defined in config file is ignored by AWS Batch executor"
|
||||
}
|
||||
}
|
||||
|
||||
protected void uploadBinDir() {
|
||||
/*
|
||||
* upload local binaries
|
||||
*/
|
||||
if( session.binDir && !session.binDir.empty() && !session.disableRemoteBinDir ) {
|
||||
def s3 = getTempDir()
|
||||
log.info "Uploading local `bin` scripts folder to ${s3.toUriString()}/bin"
|
||||
remoteBinDir = FilesEx.copyTo(session.binDir, s3)
|
||||
}
|
||||
}
|
||||
|
||||
protected void createAwsClient() {
|
||||
/*
|
||||
* retrieve config and credentials and create AWS client
|
||||
*/
|
||||
final driver = new AwsClientFactory(new AwsConfig(session.config.aws as Map))
|
||||
|
||||
/*
|
||||
* create a proxy for the aws batch client that manages the request throttling
|
||||
*/
|
||||
client = new AwsBatchProxy(driver.getBatchClient(), submitter)
|
||||
helper = new AwsBatchHelper(client, driver)
|
||||
// create the options object
|
||||
awsOptions = new AwsOptions(this)
|
||||
log.debug "[AWS BATCH] Executor ${awsOptions.fargateMode ? '(FARGATE mode) ' : ''}options=$awsOptions"
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialise the AWS batch executor.
|
||||
*/
|
||||
@Override
|
||||
protected void register() {
|
||||
super.register()
|
||||
validateWorkDir()
|
||||
validatePathDir()
|
||||
uploadBinDir()
|
||||
createAwsClient()
|
||||
}
|
||||
|
||||
@PackageScope
|
||||
Path getRemoteBinDir() {
|
||||
remoteBinDir
|
||||
}
|
||||
|
||||
@PackageScope
|
||||
BatchClient getClient() {
|
||||
client
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The monitor instance that handles AWS batch tasks
|
||||
*/
|
||||
@Override
|
||||
protected TaskMonitor createTaskMonitor() {
|
||||
|
||||
// create the throttling executor
|
||||
// note this is invoke only the very first time a AWS Batch executor is created
|
||||
// therefore it's safe to assign to a static attribute
|
||||
submitter = createExecutorService('AWSBatch-executor')
|
||||
|
||||
reaper = createExecutorService('AWSBatch-reaper')
|
||||
|
||||
final pollInterval = config.getPollInterval(name, Duration.of('10 sec'))
|
||||
final dumpInterval = config.getMonitorDumpInterval(name)
|
||||
final capacity = config.getQueueSize(name, 1000)
|
||||
|
||||
final def params = [
|
||||
name: name,
|
||||
session: session,
|
||||
config: config,
|
||||
pollInterval: pollInterval,
|
||||
dumpInterval: dumpInterval,
|
||||
capacity: capacity
|
||||
]
|
||||
|
||||
log.debug "Creating parallel monitor for executor '$name' > pollInterval=$pollInterval; dumpInterval=$dumpInterval"
|
||||
new ParallelPollingMonitor(submitter, params)
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a task handler for the given task instance
|
||||
*
|
||||
* @param task The {@link TaskRun} instance to be executed
|
||||
* @return A {@link AwsBatchTaskHandler} for the given task
|
||||
*/
|
||||
@Override
|
||||
TaskHandler createTaskHandler(TaskRun task) {
|
||||
assert task
|
||||
assert task.workDir
|
||||
log.trace "[AWS BATCH] Launching process > ${task.name} -- work folder: ${task.workDirStr}"
|
||||
new AwsBatchTaskHandler(task, this)
|
||||
}
|
||||
|
||||
private static final List<Integer> RETRYABLE_STATUS = [429, 500, 502, 503, 504]
|
||||
|
||||
/**
|
||||
* @return Creates a {@link ThrottlingExecutor} service to throttle
|
||||
* the API requests to the AWS Batch service.
|
||||
*/
|
||||
private ThrottlingExecutor createExecutorService(String name) {
|
||||
|
||||
// queue size can be overridden by submitter options below
|
||||
final qs = 5_000
|
||||
final limit = config.getExecConfigProp(name, 'submitRateLimit', '50/s') as String
|
||||
final size = Runtime.runtime.availableProcessors() * 5
|
||||
|
||||
final opts = new ThrottlingExecutor.Options()
|
||||
.retryOn { Throwable t -> t instanceof BatchException && (t.awsErrorDetails().errorCode() == 'TooManyRequestsException' || t.statusCode() in RETRYABLE_STATUS) }
|
||||
.onFailure { Throwable t -> session?.abort(t) }
|
||||
.onRateLimitChange { RateUnit rate -> logRateLimitChange(rate) }
|
||||
.withRateLimit(limit)
|
||||
.withQueueSize(qs)
|
||||
.withPoolSize(size)
|
||||
.withKeepAlive(Duration.of('1 min'))
|
||||
.withAutoThrottle(true)
|
||||
.withMaxRetries(10)
|
||||
.withPoolName(name)
|
||||
|
||||
ThrottlingExecutor.create(opts)
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean isFusionEnabled() {
|
||||
return FusionHelper.isFusionEnabled(session)
|
||||
}
|
||||
|
||||
protected void logRateLimitChange(RateUnit rate) {
|
||||
log.debug "New submission rate limit: $rate"
|
||||
}
|
||||
|
||||
@PackageScope
|
||||
ThrottlingExecutor getReaper() { reaper }
|
||||
|
||||
boolean shouldDeleteJob(String jobId) {
|
||||
if( jobId in deletedJobs ) {
|
||||
// if the job is already in the list if has been already deleted
|
||||
log.debug "[AWS BATCH] cleanup = already deleted job $jobId"
|
||||
return false
|
||||
}
|
||||
synchronized (deletedJobs) {
|
||||
// add the job id to the set of deleted jobs, if it's a new id, the `add` method
|
||||
// returns true therefore the job should be deleted
|
||||
final result = deletedJobs.add(jobId)
|
||||
log.debug "[AWS BATCH] cleanup = should delete job $jobId: $result"
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
CloudMachineInfo getMachineInfoByQueueAndTaskArn(String queue, String taskArn) {
|
||||
try {
|
||||
return helper?.getCloudInfoByQueueAndTaskArn(queue, taskArn)
|
||||
}
|
||||
catch ( AccessDeniedException e ) {
|
||||
log.warn "Unable to retrieve AWS Batch instance type | ${e.message}"
|
||||
// disable it since user has not permission to access this info
|
||||
awsOptions.fetchInstanceType = false
|
||||
return null
|
||||
}
|
||||
catch( Exception e ) {
|
||||
log.warn "Unable to retrieve AWS batch instance type for queue=$queue; task=$taskArn | ${e.message}", e
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
String getJobOutputStream(String jobId) {
|
||||
try {
|
||||
return helper.getTaskLogStream(jobId, awsOptions.getLogsGroup())
|
||||
}
|
||||
catch (ResourceNotFoundException e) {
|
||||
log.debug "Unable to find AWS Cloudwatch logs for Batch Job id=$jobId - ${e.message}"
|
||||
}
|
||||
catch (Exception e) {
|
||||
log.debug "Unable to retrieve AWS Cloudwatch logs for Batch Job id=$jobId | ${e.message}", e
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
@Override
|
||||
void shutdown() {
|
||||
def tasks = submitter.shutdownNow()
|
||||
if( tasks ) log.warn "Execution interrupted -- cleaning up execution pool"
|
||||
submitter.awaitTermination(5, TimeUnit.MINUTES)
|
||||
// -- finally delete cleanup executor
|
||||
// start shutdown process
|
||||
reaper.shutdown()
|
||||
final waitMsg = "[AWS BATCH] Waiting jobs reaper to complete (%d jobs to be terminated)"
|
||||
final exitMsg = "[AWS BATCH] Exiting before jobs reaper thread pool complete -- Some jobs may not be terminated"
|
||||
awaitCompletion(reaper, Duration.of('60min'), waitMsg, exitMsg)
|
||||
|
||||
}
|
||||
|
||||
protected void awaitCompletion(ThrottlingExecutor executor, Duration duration, String waitMsg, String exitMsg) {
|
||||
try {
|
||||
ThreadPoolHelper.await(executor, duration, waitMsg, exitMsg)
|
||||
}
|
||||
catch( TimeoutException e ) {
|
||||
log.warn(e.message, e)
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
String getArrayIndexName() { 'AWS_BATCH_JOB_ARRAY_INDEX' }
|
||||
|
||||
@Override
|
||||
int getArrayIndexStart() { 0 }
|
||||
|
||||
@Override
|
||||
String getArrayTaskId(String jobId, int index) {
|
||||
return "${jobId}:${index}"
|
||||
}
|
||||
|
||||
@Override
|
||||
String getArrayLaunchCommand(String taskDir) {
|
||||
if( isFusionEnabled() || isWorkDirDefaultFS() )
|
||||
return TaskArrayExecutor.super.getArrayLaunchCommand(taskDir)
|
||||
else
|
||||
return Escape.cli(getLaunchCommand(taskDir) as String[])
|
||||
}
|
||||
|
||||
List<String> getLaunchCommand(String s3WorkDir) {
|
||||
// the cmd list to launch it
|
||||
final opts = getAwsOptions()
|
||||
final cmd = opts.s5cmdPath
|
||||
? s5Cmd(s3WorkDir, opts)
|
||||
: s3Cmd(s3WorkDir, opts)
|
||||
return ['bash','-o','pipefail','-c', cmd.toString()]
|
||||
}
|
||||
|
||||
static String s3Cmd(String workDir, AwsOptions opts) {
|
||||
final cli = opts.getAwsCli()
|
||||
final debug = opts.debug ? ' --debug' : ''
|
||||
final sse = opts.storageEncryption ? " --sse $opts.storageEncryption" : ''
|
||||
final kms = opts.storageKmsKeyId ? " --sse-kms-key-id $opts.storageKmsKeyId" : ''
|
||||
final requesterPays = opts.requesterPays ? ' --request-payer requester' : ''
|
||||
final aws = "$cli s3 cp --only-show-errors${sse}${kms}${debug}${requesterPays}"
|
||||
|
||||
/*
|
||||
* Enhanced signal handling for AWS Batch tasks to fix nested Nextflow execution issues.
|
||||
* This implementation addresses the problem of proper signal forwarding when Nextflow
|
||||
* processes are executed within AWS Batch containers.
|
||||
*
|
||||
* References: https://github.com/nextflow-io/nextflow/pull/6414
|
||||
*
|
||||
* Trap command breakdown:
|
||||
*
|
||||
* 1. TERM signal trap: `trap \"[[ -n \\\$pid ]] && kill -TERM \\\$pid\" TERM`
|
||||
* - Captures SIGTERM signals sent to the parent shell process
|
||||
* - Conditionally forwards the TERM signal to the background bash process (stored in $pid)
|
||||
* - The `[[ -n \\\$pid ]]` test ensures we only attempt to kill if $pid is set and non-empty
|
||||
* - This prevents attempts to kill process ID 0 or empty values, which could cause unintended behavior
|
||||
* - Essential for proper cleanup when AWS Batch terminates jobs or when users cancel workflows
|
||||
*
|
||||
* 2. EXIT signal trap: `trap \"{ ret=\$?; $aws ${TaskRun.CMD_LOG} ${workDir}/${TaskRun.CMD_LOG}||true; exit \$ret; }\" EXIT`
|
||||
* - Executes cleanup actions when the shell process exits (normal or abnormal termination)
|
||||
* - Captures the exit status ($?) of the last executed command before cleanup
|
||||
* - Uploads the command log file to S3 for debugging and monitoring purposes
|
||||
* - Uses `||true` to prevent the trap from failing if S3 upload fails (ensures exit code preservation)
|
||||
* - Preserves and returns the original exit status to maintain proper error propagation
|
||||
*
|
||||
* 3. Background execution pattern: `bash > >(tee ${TaskRun.CMD_LOG}) 2>&1 & pid=\$!; wait \$pid`
|
||||
* - Runs the actual task command in background (&) to allow signal handling
|
||||
* - Redirects both stdout and stderr (2>&1) to process substitution for real-time logging
|
||||
* - Uses `tee` to simultaneously write logs to file and display to console
|
||||
* - Stores the background process ID in $pid for signal forwarding
|
||||
* - `wait $pid` ensures the parent shell waits for task completion and returns proper exit code
|
||||
* - This pattern allows the parent shell to remain responsive to signals while task executes
|
||||
*/
|
||||
final cmd = "trap \"[[ -n \\\$pid ]] && kill -TERM \\\$pid\" TERM; trap \"{ ret=\$?; $aws ${TaskRun.CMD_LOG} ${workDir}/${TaskRun.CMD_LOG}||true; exit \$ret; }\" EXIT; $aws ${workDir}/${TaskRun.CMD_RUN} - | bash > >(tee ${TaskRun.CMD_LOG}) 2>&1 & pid=\$!; wait \$pid"
|
||||
return cmd
|
||||
}
|
||||
|
||||
static String s5Cmd(String workDir, AwsOptions opts) {
|
||||
final cli = opts.getS5cmdPath()
|
||||
final sse = opts.storageEncryption ? " --sse $opts.storageEncryption" : ''
|
||||
final kms = opts.storageKmsKeyId ? " --sse-kms-key-id $opts.storageKmsKeyId" : ''
|
||||
final requesterPays = opts.requesterPays ? ' --request-payer requester' : ''
|
||||
|
||||
/*
|
||||
* Enhanced signal handling for AWS Batch tasks using s5cmd (high-performance S3 client).
|
||||
* This implementation mirrors the s3Cmd method but uses s5cmd instead of aws-cli for
|
||||
* improved S3 transfer performance.
|
||||
*
|
||||
* References: https://github.com/nextflow-io/nextflow/pull/6414
|
||||
*
|
||||
* The trap commands follow the same pattern as s3Cmd method:
|
||||
*
|
||||
* 1. TERM signal trap: `trap \"[[ -n \\\$pid ]] && kill -TERM \\\$pid\" TERM`
|
||||
* - Ensures proper signal forwarding to background processes when SIGTERM is received
|
||||
* - Critical for handling AWS Batch job termination and user-initiated cancellations
|
||||
*
|
||||
* 2. EXIT signal trap: `trap \"{ ret=\$?; $cli cp${sse}${kms}${requesterPays} ${TaskRun.CMD_LOG} ${workDir}/${TaskRun.CMD_LOG}||true; exit \$ret; }\" EXIT`
|
||||
* - Performs cleanup by uploading task logs using s5cmd instead of aws-cli
|
||||
* - Maintains exit status preservation for proper error reporting
|
||||
*
|
||||
* 3. Background execution with s5cmd: `$cli cat ${workDir}/${TaskRun.CMD_RUN} | bash > >(tee ${TaskRun.CMD_LOG}) 2>&1 & pid=\$!; wait \$pid`
|
||||
* - Uses s5cmd to stream the task script directly into bash execution
|
||||
* - Maintains the same signal-responsive background execution pattern
|
||||
* - Provides real-time logging while allowing proper signal handling
|
||||
*/
|
||||
final cmd = "trap \"[[ -n \\\$pid ]] && kill -TERM \\\$pid\" TERM; trap \"{ ret=\$?; $cli cp${sse}${kms}${requesterPays} ${TaskRun.CMD_LOG} ${workDir}/${TaskRun.CMD_LOG}||true; exit \$ret; }\" EXIT; $cli cat ${workDir}/${TaskRun.CMD_RUN} | bash > >(tee ${TaskRun.CMD_LOG}) 2>&1 & pid=\$!; wait \$pid"
|
||||
return cmd
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,173 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.util.logging.Slf4j
|
||||
import nextflow.cloud.aws.util.S3BashLib
|
||||
import nextflow.executor.SimpleFileCopyStrategy
|
||||
import nextflow.processor.TaskBean
|
||||
import nextflow.util.Escape
|
||||
|
||||
/**
|
||||
* Defines the script operation to handle file when running in the Cirrus cluster
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@Slf4j
|
||||
@CompileStatic
|
||||
class AwsBatchFileCopyStrategy extends SimpleFileCopyStrategy {
|
||||
|
||||
private AwsOptions opts
|
||||
|
||||
private Map<String,String> environment
|
||||
|
||||
AwsBatchFileCopyStrategy(TaskBean task, AwsOptions opts ) {
|
||||
super(task)
|
||||
this.opts = opts
|
||||
this.environment = task.environment
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A script snippet that download from S3 the task scripts:
|
||||
* {@code .command.env}, {@code .command.sh}, {@code .command.in},
|
||||
* etc.
|
||||
*/
|
||||
String getBeforeStartScript() {
|
||||
S3BashLib.script(opts)
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
String getEnvScript(Map environment, boolean container) {
|
||||
if( container )
|
||||
throw new IllegalArgumentException("Parameter `container` not supported by ${this.class.simpleName}")
|
||||
|
||||
final result = new StringBuilder()
|
||||
final copy = environment ? new LinkedHashMap<String,String>(environment) : Collections.<String,String>emptyMap()
|
||||
final path = copy.containsKey('PATH')
|
||||
// remove any external PATH
|
||||
if( path )
|
||||
copy.remove('PATH')
|
||||
// when a remote bin directory is provide managed it properly
|
||||
if( opts.remoteBinDir ) {
|
||||
result << "${opts.getAwsCli()} s3 cp --recursive --only-show-errors s3:/${opts.remoteBinDir} \$PWD/nextflow-bin\n"
|
||||
result << "chmod +x \$PWD/nextflow-bin/* || true\n"
|
||||
result << "export PATH=\$PWD/nextflow-bin:\$PATH\n"
|
||||
}
|
||||
// finally render the environment
|
||||
final envSnippet = super.getEnvScript(copy,false)
|
||||
if( envSnippet )
|
||||
result << envSnippet
|
||||
return result.toString()
|
||||
}
|
||||
|
||||
@Override
|
||||
String getStageInputFilesScript(Map<String,Path> inputFiles) {
|
||||
def result = 'downloads=(true)\n'
|
||||
result += super.getStageInputFilesScript(inputFiles) + '\n'
|
||||
result += 'nxf_parallel "${downloads[@]}"\n'
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
String stageInputFile( Path path, String targetName ) {
|
||||
// third param should not be escaped, because it's used in the grep match rule
|
||||
def stage_cmd = opts.maxTransferAttempts > 1 && !opts.retryMode
|
||||
? "downloads+=(\"nxf_cp_retry nxf_s3_download s3:/${Escape.path(path)} ${Escape.path(targetName)}\")"
|
||||
: "downloads+=(\"nxf_s3_download s3:/${Escape.path(path)} ${Escape.path(targetName)}\")"
|
||||
return stage_cmd
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
String getUnstageOutputFilesScript(List<String> outputFiles, Path targetDir) {
|
||||
|
||||
final patterns = normalizeGlobStarPaths(outputFiles)
|
||||
// create a bash script that will copy the out file to the working directory
|
||||
log.trace "[AWS BATCH] Unstaging file path: $patterns"
|
||||
|
||||
if( !patterns )
|
||||
return null
|
||||
|
||||
final escape = new ArrayList(outputFiles.size())
|
||||
for( String it : patterns )
|
||||
escape.add( Escape.path(it) )
|
||||
|
||||
return """\
|
||||
uploads=()
|
||||
IFS=\$'\\n'
|
||||
for name in \$(eval "ls -1d ${escape.join(' ')}" | sort | uniq); do
|
||||
uploads+=("nxf_s3_upload '\$name' s3:/${Escape.path(targetDir)}")
|
||||
done
|
||||
unset IFS
|
||||
nxf_parallel "\${uploads[@]}"
|
||||
""".stripIndent(true)
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
String touchFile( Path file ) {
|
||||
"echo start | nxf_s3_upload - s3:/${Escape.path(file)}"
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
String fileStr( Path path ) {
|
||||
Escape.path(path.getFileName())
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
String copyFile( String name, Path target ) {
|
||||
"nxf_s3_upload ${Escape.path(name)} s3:/${Escape.path(target.getParent())}"
|
||||
}
|
||||
|
||||
static String uploadCmd( String source, Path target ) {
|
||||
"nxf_s3_upload ${Escape.path(source)} s3:/${Escape.path(target)}"
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
String exitFile( Path path ) {
|
||||
"| nxf_s3_upload - s3:/${Escape.path(path)} || true"
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
String pipeInputFile( Path path ) {
|
||||
" < ${Escape.path(path.getFileName())}"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,239 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch
|
||||
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.transform.Memoized
|
||||
import groovy.util.logging.Slf4j
|
||||
import nextflow.cloud.aws.AwsClientFactory
|
||||
import nextflow.cloud.types.CloudMachineInfo
|
||||
import nextflow.cloud.types.PriceModel
|
||||
import software.amazon.awssdk.services.batch.BatchClient
|
||||
import software.amazon.awssdk.services.batch.model.DescribeComputeEnvironmentsRequest
|
||||
import software.amazon.awssdk.services.batch.model.DescribeJobQueuesRequest
|
||||
import software.amazon.awssdk.services.batch.model.DescribeJobsRequest
|
||||
import software.amazon.awssdk.services.cloudwatchlogs.CloudWatchLogsClient
|
||||
import software.amazon.awssdk.services.cloudwatchlogs.model.GetLogEventsRequest
|
||||
import software.amazon.awssdk.services.cloudwatchlogs.model.OutputLogEvent
|
||||
import software.amazon.awssdk.services.ec2.Ec2Client
|
||||
import software.amazon.awssdk.services.ec2.model.DescribeInstancesRequest
|
||||
import software.amazon.awssdk.services.ec2.model.Instance
|
||||
import software.amazon.awssdk.services.ec2.model.InstanceLifecycleType
|
||||
import software.amazon.awssdk.services.ecs.EcsClient
|
||||
import software.amazon.awssdk.services.ecs.model.DescribeContainerInstancesRequest
|
||||
import software.amazon.awssdk.services.ecs.model.DescribeTasksRequest
|
||||
import software.amazon.awssdk.services.ecs.model.InvalidParameterException
|
||||
/**
|
||||
* Helper class to resolve Batch related metadata
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@Slf4j
|
||||
@CompileStatic
|
||||
class AwsBatchHelper {
|
||||
|
||||
private AwsClientFactory factory
|
||||
private BatchClient batchClient
|
||||
|
||||
AwsBatchHelper(BatchClient batchClient, AwsClientFactory factory) {
|
||||
this.batchClient = batchClient
|
||||
this.factory = factory
|
||||
}
|
||||
|
||||
@Memoized
|
||||
private EcsClient getEcsClient() {
|
||||
return factory.getEcsClient()
|
||||
}
|
||||
|
||||
@Memoized
|
||||
private Ec2Client getEc2Client() {
|
||||
return factory.getEc2Client()
|
||||
}
|
||||
|
||||
@Memoized
|
||||
private CloudWatchLogsClient getLogsClient() {
|
||||
return factory.getLogsClient()
|
||||
}
|
||||
|
||||
@Memoized(maxCacheSize = 100)
|
||||
private List<String> getClusterArnByBatchQueue(String queueName) {
|
||||
final envNames = getComputeEnvByQueueName(queueName)
|
||||
return getClusterArnByCompEnvNames(envNames)
|
||||
}
|
||||
|
||||
private List<String> getClusterArnByCompEnvNames(List<String> envNames) {
|
||||
final req = DescribeComputeEnvironmentsRequest.builder()
|
||||
.computeEnvironments(envNames)
|
||||
.build() as DescribeComputeEnvironmentsRequest
|
||||
batchClient
|
||||
.describeComputeEnvironments(req)
|
||||
.computeEnvironments()
|
||||
*.ecsClusterArn()
|
||||
}
|
||||
|
||||
private List<String> getComputeEnvByQueueName(String queueName) {
|
||||
final req = DescribeJobQueuesRequest.builder()
|
||||
.jobQueues(queueName)
|
||||
.build() as DescribeJobQueuesRequest
|
||||
|
||||
final resp = batchClient.describeJobQueues(req)
|
||||
|
||||
final result = new ArrayList<String>(10)
|
||||
for (final queue : resp.jobQueues()) {
|
||||
for (final order : queue.computeEnvironmentOrder()) {
|
||||
result.add(order.computeEnvironment())
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
private CloudMachineInfo getInfoByClusterAndTaskArn(String clusterArn, String taskArn) {
|
||||
final containerId = getContainerIdByClusterAndTaskArn(clusterArn, taskArn)
|
||||
final instanceId = containerId ? getInstanceIdByClusterAndContainerId(clusterArn, containerId) : null as String
|
||||
return instanceId ? getInfoByInstanceId(instanceId) : null
|
||||
}
|
||||
|
||||
private String getContainerIdByClusterAndTaskArn(String clusterArn, String taskArn) {
|
||||
final describeTaskReq = DescribeTasksRequest.builder()
|
||||
.cluster(clusterArn)
|
||||
.tasks(taskArn)
|
||||
.build() as DescribeTasksRequest
|
||||
try {
|
||||
final describeTasksResult = ecsClient.describeTasks(describeTaskReq)
|
||||
final containers =
|
||||
describeTasksResult.tasks()
|
||||
*.containerInstanceArn()
|
||||
if( containers.size()==1 ) {
|
||||
return containers.get(0)
|
||||
}
|
||||
if( containers.size()==0 ) {
|
||||
log.debug "Unable to find container id for clusterArn=$clusterArn and taskArn=$taskArn"
|
||||
return null
|
||||
}
|
||||
else
|
||||
throw new IllegalStateException("Found more than one container for taskArn=$taskArn")
|
||||
}
|
||||
catch (InvalidParameterException e) {
|
||||
log.debug "Cannot find container id for clusterArn=$clusterArn and taskArn=$taskArn - The task is likely running on another cluster"
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
private String getInstanceIdByClusterAndContainerId(String clusterArn, String containerId) {
|
||||
final describeContainerReq = DescribeContainerInstancesRequest.builder()
|
||||
.cluster(clusterArn)
|
||||
.containerInstances(containerId)
|
||||
.build() as DescribeContainerInstancesRequest
|
||||
final instanceIds = ecsClient
|
||||
.describeContainerInstances(describeContainerReq)
|
||||
.containerInstances()
|
||||
*.ec2InstanceId()
|
||||
if( !instanceIds ) {
|
||||
log.debug "Unable to find EC2 instance id for clusterArn=$clusterArn and containerId=$containerId"
|
||||
return null
|
||||
}
|
||||
if( instanceIds.size()==1 )
|
||||
return instanceIds.get(0)
|
||||
else
|
||||
throw new IllegalStateException("Found more than one EC2 instance for containerId=$containerId")
|
||||
}
|
||||
|
||||
@Memoized(maxCacheSize = 1_000)
|
||||
private CloudMachineInfo getInfoByInstanceId(String instanceId) {
|
||||
assert instanceId
|
||||
final req = DescribeInstancesRequest.builder()
|
||||
.instanceIds(instanceId)
|
||||
.build() as DescribeInstancesRequest
|
||||
final res = ec2Client.describeInstances(req).reservations() [0]
|
||||
final Instance instance = res ? res.instances() [0] : null
|
||||
if( !instance ) {
|
||||
log.debug "Unable to find cloud machine info for instanceId=$instanceId"
|
||||
return null
|
||||
}
|
||||
|
||||
new CloudMachineInfo(
|
||||
getInstanceType(instance),
|
||||
instance.placement().availabilityZone(),
|
||||
getPrice(instance))
|
||||
}
|
||||
|
||||
protected String getInstanceType(Instance instance) {
|
||||
return instance ? instance.instanceTypeAsString() : null
|
||||
}
|
||||
|
||||
private PriceModel getPrice(Instance instance) {
|
||||
instance.instanceLifecycle() == InstanceLifecycleType.SPOT ? PriceModel.spot : PriceModel.standard
|
||||
}
|
||||
|
||||
CloudMachineInfo getCloudInfoByQueueAndTaskArn(String queue, String taskArn) {
|
||||
final clusterArnList = getClusterArnByBatchQueue(queue)
|
||||
for( String cluster : clusterArnList ) {
|
||||
final result = getInfoByClusterAndTaskArn(cluster, taskArn)
|
||||
if( result )
|
||||
return result
|
||||
}
|
||||
|
||||
log.debug "Unable to find cloud info for queue=$queue and taskArn=$taskArn"
|
||||
return null
|
||||
}
|
||||
|
||||
protected String getLogStreamId(String jobId) {
|
||||
final request = DescribeJobsRequest.builder()
|
||||
.jobs(jobId)
|
||||
.build() as DescribeJobsRequest
|
||||
final response = batchClient.describeJobs(request)
|
||||
if( response.jobs() ) {
|
||||
final detail = response.jobs()[0]
|
||||
return detail.container().logStreamName()
|
||||
}
|
||||
else {
|
||||
log.debug "Unable to find info for batch job id=$jobId"
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the cloudwatch logs for the specified AWS Batch Job ID
|
||||
*
|
||||
* @param jobId
|
||||
* The Batch Job ID for which retrieve the job
|
||||
* @return
|
||||
* The Batch jobs as a string value or {@code null} if no logs is available. Note, if the log
|
||||
* is made of multiple *page* this method returns only the first one
|
||||
*/
|
||||
String getTaskLogStream(String jobId, String groupName) {
|
||||
final streamId = getLogStreamId(jobId)
|
||||
if( !streamId ) {
|
||||
log.debug "Unable to find CloudWatch log stream for batch job id=$jobId"
|
||||
return null
|
||||
}
|
||||
|
||||
final logRequest = GetLogEventsRequest.builder()
|
||||
.logGroupName(groupName ?: "/aws/batch/job")
|
||||
.logStreamName(streamId)
|
||||
.build() as GetLogEventsRequest
|
||||
|
||||
final result = new StringBuilder()
|
||||
final resp = logsClient.getLogEvents(logRequest)
|
||||
for( OutputLogEvent it : resp.events() ) {
|
||||
result.append(it.message()).append('\n')
|
||||
}
|
||||
|
||||
return result.toString()
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch
|
||||
|
||||
import software.amazon.awssdk.services.batch.BatchClient
|
||||
import nextflow.util.ClientProxyThrottler
|
||||
import nextflow.util.ThrottlingExecutor
|
||||
/**
|
||||
* Implements a AWS Batch client proxy that handle all API invocations
|
||||
* through the provided executor service
|
||||
*
|
||||
* WARN: the caller class/method should not be compile static
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class AwsBatchProxy extends ClientProxyThrottler<BatchClient> {
|
||||
|
||||
@Delegate(deprecated=true)
|
||||
private BatchClient target
|
||||
|
||||
AwsBatchProxy(BatchClient client, ThrottlingExecutor executor) {
|
||||
super(client, executor, [describeJobs: 10 as Byte]) // note: use higher priority for `describeJobs` invocations
|
||||
this.target = client
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch
|
||||
|
||||
import groovy.transform.CompileStatic
|
||||
import nextflow.container.ContainerHelper
|
||||
import nextflow.executor.BashWrapperBuilder
|
||||
import nextflow.processor.TaskBean
|
||||
import nextflow.processor.TaskRun
|
||||
|
||||
/**
|
||||
* Implements BASH launcher script for AWS Batch jobs
|
||||
*/
|
||||
@CompileStatic
|
||||
class AwsBatchScriptLauncher extends BashWrapperBuilder {
|
||||
|
||||
AwsBatchScriptLauncher(TaskBean bean, AwsOptions opts ) {
|
||||
super(bean, new AwsBatchFileCopyStrategy(bean,opts))
|
||||
// enable the copying of output file to the S3 work dir
|
||||
if( scratch==null )
|
||||
scratch = true
|
||||
// include task script as an input to force its staging in the container work directory
|
||||
bean.inputFiles[TaskRun.CMD_SCRIPT] = bean.workDir.resolve(TaskRun.CMD_SCRIPT)
|
||||
// add the wrapper file when stats are enabled
|
||||
// NOTE: this must match the logic that uses the run script in BashWrapperBuilder
|
||||
if( isTraceRequired() ) {
|
||||
bean.inputFiles[TaskRun.CMD_RUN] = bean.workDir.resolve(TaskRun.CMD_RUN)
|
||||
}
|
||||
// include task stdin file
|
||||
if( bean.input != null ) {
|
||||
bean.inputFiles[TaskRun.CMD_INFILE] = bean.workDir.resolve(TaskRun.CMD_INFILE)
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean fixOwnership() {
|
||||
return ContainerHelper.fixOwnership(containerConfig)
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,181 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package nextflow.cloud.aws.batch
|
||||
|
||||
import nextflow.cloud.aws.batch.model.ContainerPropertiesModel
|
||||
import software.amazon.awssdk.services.batch.model.KeyValuePair
|
||||
import software.amazon.awssdk.services.batch.model.LinuxParameters
|
||||
import software.amazon.awssdk.services.batch.model.Tmpfs
|
||||
import software.amazon.awssdk.services.batch.model.Ulimit
|
||||
import groovy.transform.CompileStatic
|
||||
import nextflow.util.CmdLineOptionMap
|
||||
import nextflow.util.MemoryUnit
|
||||
|
||||
/**
|
||||
* Maps task container options to AWS container properties
|
||||
*
|
||||
* @see https://docs.docker.com/engine/reference/commandline/run/
|
||||
* @see https://docs.aws.amazon.com/batch/latest/APIReference/API_ContainerProperties.html
|
||||
*
|
||||
* @author Manuele Simi <manuele.simi@gmail.com>
|
||||
*/
|
||||
@CompileStatic
|
||||
class AwsContainerOptionsMapper {
|
||||
|
||||
@Deprecated
|
||||
static ContainerPropertiesModel createContainerOpts(CmdLineOptionMap options) {
|
||||
return createContainerProperties(options)
|
||||
}
|
||||
|
||||
static ContainerPropertiesModel createContainerProperties(CmdLineOptionMap options) {
|
||||
final containerProperties = new ContainerPropertiesModel()
|
||||
if ( options?.hasOptions() ) {
|
||||
checkPrivileged(options, containerProperties)
|
||||
checkEnvVars(options, containerProperties)
|
||||
checkUser(options, containerProperties)
|
||||
checkReadOnly(options, containerProperties)
|
||||
checkUlimit(options, containerProperties)
|
||||
LinuxParameters params = checkLinuxParameters(options)
|
||||
if ( params != null )
|
||||
containerProperties.linuxParameters(params)
|
||||
}
|
||||
return containerProperties
|
||||
}
|
||||
|
||||
protected static void checkPrivileged(CmdLineOptionMap options, ContainerPropertiesModel containerProperties) {
|
||||
if ( findOptionWithBooleanValue(options, 'privileged') )
|
||||
containerProperties.privileged(true)
|
||||
}
|
||||
|
||||
protected static void checkEnvVars(CmdLineOptionMap options, ContainerPropertiesModel containerProperties) {
|
||||
final keyValuePairs = new ArrayList<KeyValuePair>()
|
||||
List<String> values = findOptionWithMultipleValues(options, 'env')
|
||||
values.addAll(findOptionWithMultipleValues(options, 'e'))
|
||||
for( String it : values ) {
|
||||
final tokens = it.tokenize('=')
|
||||
keyValuePairs << KeyValuePair.builder().name(tokens[0]).value(tokens.size() == 2 ? tokens[1] : null).build()
|
||||
}
|
||||
if ( keyValuePairs )
|
||||
containerProperties.environment(keyValuePairs)
|
||||
}
|
||||
|
||||
protected static void checkUser(CmdLineOptionMap options, ContainerPropertiesModel containerProperties) {
|
||||
String user = findOptionWithSingleValue(options, 'u')
|
||||
if ( !user)
|
||||
user = findOptionWithSingleValue(options, 'user')
|
||||
if ( user )
|
||||
containerProperties.user(user)
|
||||
}
|
||||
|
||||
protected static void checkReadOnly(CmdLineOptionMap options, ContainerPropertiesModel containerProperties) {
|
||||
if ( findOptionWithBooleanValue(options, 'read-only') )
|
||||
containerProperties.readonlyRootFilesystem(true);
|
||||
}
|
||||
|
||||
protected static void checkUlimit(CmdLineOptionMap options, ContainerPropertiesModel containerProperties) {
|
||||
final ulimits = new ArrayList<Ulimit>()
|
||||
findOptionWithMultipleValues(options, 'ulimit').each { value ->
|
||||
final tokens = value.tokenize('=')
|
||||
final limits = tokens[1].tokenize(':')
|
||||
if ( limits.size() > 1 )
|
||||
ulimits << Ulimit.builder().name(tokens[0]).softLimit(limits[0] as Integer).hardLimit(limits[1] as Integer).build()
|
||||
else
|
||||
ulimits << Ulimit.builder().name(tokens[0]).softLimit(limits[0] as Integer).build()
|
||||
}
|
||||
if ( ulimits.size() )
|
||||
containerProperties.ulimits(ulimits)
|
||||
}
|
||||
|
||||
protected static LinuxParameters checkLinuxParameters(CmdLineOptionMap options) {
|
||||
final params = LinuxParameters.builder()
|
||||
boolean atLeastOneSet = false
|
||||
|
||||
// shared Memory Size
|
||||
def value = findOptionWithSingleValue(options, 'shm-size')
|
||||
if ( value ) {
|
||||
final sharedMemorySize = MemoryUnit.of(value)
|
||||
params.sharedMemorySize(sharedMemorySize.mega as Integer)
|
||||
atLeastOneSet = true
|
||||
}
|
||||
|
||||
// tmpfs mounts, e.g --tmpfs /run:rw,noexec,nosuid,size=64
|
||||
final tmpfs = new ArrayList<Tmpfs>()
|
||||
findOptionWithMultipleValues(options, 'tmpfs').each { ovalue ->
|
||||
def matcher = ovalue =~ /^(?<path>.*):(?<options>.*?),size=(?<sizeMiB>.*)$/
|
||||
if (matcher.matches()) {
|
||||
tmpfs << Tmpfs.builder().containerPath(matcher.group('path'))
|
||||
.size(matcher.group('sizeMiB') as Integer)
|
||||
.mountOptions(matcher.group('options').tokenize(','))
|
||||
.build()
|
||||
} else {
|
||||
throw new IllegalArgumentException("Found a malformed value '${ovalue}' for --tmpfs option")
|
||||
}
|
||||
}
|
||||
if ( tmpfs ) {
|
||||
params.tmpfs(tmpfs)
|
||||
atLeastOneSet = true
|
||||
}
|
||||
|
||||
// swap limit equal to memory plus swap
|
||||
value = findOptionWithSingleValue(options, 'memory-swap')
|
||||
if ( value ) {
|
||||
params.maxSwap(value as Integer)
|
||||
atLeastOneSet = true
|
||||
}
|
||||
|
||||
// run an init inside the container
|
||||
if ( findOptionWithBooleanValue(options, 'init') ) {
|
||||
params.initProcessEnabled(true)
|
||||
atLeastOneSet = true
|
||||
}
|
||||
|
||||
// tune container memory swappiness
|
||||
value = findOptionWithSingleValue(options, 'memory-swappiness')
|
||||
if ( value ) {
|
||||
params.swappiness(value as Integer)
|
||||
atLeastOneSet = true
|
||||
}
|
||||
|
||||
return atLeastOneSet ? params.build() : null
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the value of an option
|
||||
* @param name the name of the option
|
||||
* @return the value, if any, or empty
|
||||
*/
|
||||
protected static String findOptionWithSingleValue(CmdLineOptionMap options, String name) {
|
||||
options.getFirstValueOrDefault(name,null) as String
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the values of an option that can be repeated
|
||||
* @param name the name of the option
|
||||
* @return the list of values
|
||||
*/
|
||||
protected static List<String> findOptionWithMultipleValues(CmdLineOptionMap options, String name) {
|
||||
options.getValues(name)
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a boolean flag exists
|
||||
* @param name the name of the flag
|
||||
* @return true if it exists, false otherwise
|
||||
*/
|
||||
protected static boolean findOptionWithBooleanValue(CmdLineOptionMap options, String name) {
|
||||
options.exists(name) ? options.getFirstValue(name) as Boolean : false
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.transform.EqualsAndHashCode
|
||||
import groovy.transform.ToString
|
||||
import groovy.util.logging.Slf4j
|
||||
import nextflow.Session
|
||||
import nextflow.cloud.CloudTransferOptions
|
||||
import nextflow.cloud.aws.config.AwsConfig
|
||||
import nextflow.util.Duration
|
||||
import nextflow.util.TestOnly
|
||||
/**
|
||||
* Helper class wrapping AWS config options required for Batch job executions
|
||||
*/
|
||||
@Slf4j
|
||||
@ToString(includeNames = true, includePackage = false)
|
||||
@EqualsAndHashCode
|
||||
@CompileStatic
|
||||
class AwsOptions implements CloudTransferOptions {
|
||||
|
||||
public static final List<String> VALID_RETRY_MODES = ['legacy','standard','adaptive']
|
||||
|
||||
private AwsConfig awsConfig
|
||||
|
||||
String remoteBinDir
|
||||
|
||||
volatile Boolean fetchInstanceType
|
||||
|
||||
@TestOnly
|
||||
protected AwsOptions() {
|
||||
this.awsConfig=new AwsConfig(Collections.emptyMap())
|
||||
}
|
||||
|
||||
AwsOptions( AwsBatchExecutor executor ) {
|
||||
this(executor.session)
|
||||
this.remoteBinDir = executor.getRemoteBinDir()
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
AwsOptions(Session session) {
|
||||
awsConfig = new AwsConfig(session.config.aws as Map ?: Collections.emptyMap())
|
||||
fetchInstanceType = session.config.navigate('aws.batch.fetchInstanceType')
|
||||
if( fetchInstanceType==null )
|
||||
fetchInstanceType = session.config.navigate('tower.enabled',false)
|
||||
}
|
||||
|
||||
String getRegion() {
|
||||
return awsConfig.getRegion()
|
||||
}
|
||||
|
||||
String getJobRole() {
|
||||
return awsConfig.batchConfig.getJobRole()
|
||||
}
|
||||
|
||||
String getLogsGroup() {
|
||||
return awsConfig.batchConfig.getLogsGroup()
|
||||
}
|
||||
|
||||
String getRetryMode() {
|
||||
return awsConfig.batchConfig.getRetryMode()
|
||||
}
|
||||
|
||||
String getShareIdentifier() {
|
||||
return awsConfig.batchConfig.getShareIdentifier()
|
||||
}
|
||||
|
||||
Integer getSchedulingPriority() {
|
||||
return awsConfig.batchConfig.getSchedulingPriority()
|
||||
}
|
||||
|
||||
String getCliPath() {
|
||||
return awsConfig.batchConfig.getCliPath()
|
||||
}
|
||||
|
||||
List<String> getVolumes() {
|
||||
final result = awsConfig.batchConfig.getVolumes()
|
||||
return result != null ? Collections.unmodifiableList(result) : Collections.<String>emptyList()
|
||||
}
|
||||
|
||||
int getMaxParallelTransfers() {
|
||||
return awsConfig.batchConfig.getMaxParallelTransfers()
|
||||
}
|
||||
|
||||
int getMaxTransferAttempts() {
|
||||
return awsConfig.batchConfig.getMaxTransferAttempts()
|
||||
}
|
||||
|
||||
Integer getMaxSpotAttempts() {
|
||||
return awsConfig.batchConfig.getMaxSpotAttempts()
|
||||
}
|
||||
|
||||
Duration getDelayBetweenAttempts() {
|
||||
return awsConfig.batchConfig.getDelayBetweenAttempts()
|
||||
}
|
||||
|
||||
String getStorageClass() {
|
||||
return awsConfig.s3Config.getStorageClass()
|
||||
}
|
||||
|
||||
String getStorageEncryption() {
|
||||
return awsConfig.s3Config.getStorageEncryption()
|
||||
}
|
||||
|
||||
String getStorageKmsKeyId() {
|
||||
return awsConfig.s3Config.getStorageKmsKeyId()
|
||||
}
|
||||
|
||||
ObjectCannedACL getS3Acl() {
|
||||
return awsConfig.s3Config.getS3Acl()
|
||||
}
|
||||
|
||||
Boolean getDebug() {
|
||||
return awsConfig.s3Config.getDebug()
|
||||
}
|
||||
|
||||
Boolean getRequesterPays() {
|
||||
return awsConfig.s3Config.getRequesterPays()
|
||||
}
|
||||
|
||||
String getAwsCli() {
|
||||
def result = getCliPath()
|
||||
if( !result ) result = 'aws'
|
||||
if( region ) result += " --region $region"
|
||||
return result
|
||||
}
|
||||
|
||||
AwsOptions addVolume(Path path) {
|
||||
awsConfig.batchConfig.addVolume(path)
|
||||
return this
|
||||
}
|
||||
|
||||
boolean isFargateMode() {
|
||||
return awsConfig.batchConfig.fargateMode
|
||||
}
|
||||
|
||||
String getS5cmdPath() {
|
||||
return awsConfig.batchConfig.s5cmdPath
|
||||
}
|
||||
|
||||
String getExecutionRole() {
|
||||
return awsConfig.batchConfig.getExecutionRole()
|
||||
}
|
||||
|
||||
boolean terminateUnschedulableJobs() {
|
||||
return awsConfig.batchConfig.terminateUnschedulableJobs
|
||||
}
|
||||
|
||||
Boolean getForceGlacierTransfer() {
|
||||
return awsConfig.batchConfig.forceGlacierTransfer
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,287 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch.model
|
||||
|
||||
import groovy.transform.CompileStatic
|
||||
import software.amazon.awssdk.services.batch.model.ContainerProperties
|
||||
import software.amazon.awssdk.services.batch.model.EphemeralStorage
|
||||
import software.amazon.awssdk.services.batch.model.KeyValuePair
|
||||
import software.amazon.awssdk.services.batch.model.LinuxParameters
|
||||
import software.amazon.awssdk.services.batch.model.LogConfiguration
|
||||
import software.amazon.awssdk.services.batch.model.MountPoint
|
||||
import software.amazon.awssdk.services.batch.model.NetworkConfiguration
|
||||
import software.amazon.awssdk.services.batch.model.ResourceRequirement
|
||||
import software.amazon.awssdk.services.batch.model.RuntimePlatform
|
||||
import software.amazon.awssdk.services.batch.model.Secret
|
||||
import software.amazon.awssdk.services.batch.model.Ulimit
|
||||
import software.amazon.awssdk.services.batch.model.Volume
|
||||
|
||||
/**
|
||||
* Models the container properties used to configure an AWS Batch job.
|
||||
*
|
||||
* This is a mutable version of {@link ContainerProperties} required
|
||||
* to simplify the extension of container settings in the AWS Batch executor
|
||||
* and its sub-classes (e.g. nf-xpack).
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@CompileStatic
|
||||
class ContainerPropertiesModel {
|
||||
|
||||
private String image
|
||||
|
||||
private List<String> command
|
||||
|
||||
private List<ResourceRequirement> resourceRequirements
|
||||
|
||||
private String jobRoleArn
|
||||
|
||||
private String executionRoleArn
|
||||
|
||||
private LinuxParameters linuxParameters
|
||||
|
||||
private ArrayList<KeyValuePair> environment
|
||||
|
||||
private boolean privileged
|
||||
|
||||
private String user
|
||||
|
||||
private boolean readonlyRootFilesystem
|
||||
|
||||
private ArrayList<Ulimit> ulimits
|
||||
|
||||
private LogConfiguration logConfiguration
|
||||
|
||||
private List<MountPoint> mountPoints
|
||||
|
||||
private List<Volume> volumes
|
||||
|
||||
private NetworkConfiguration networkConfiguration
|
||||
|
||||
private EphemeralStorage ephemeralStorage
|
||||
|
||||
private RuntimePlatform runtimePlatform
|
||||
|
||||
private List<Secret> secrets
|
||||
|
||||
ContainerPropertiesModel image(String value) {
|
||||
this.image = value
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel command(String... value) {
|
||||
this.command = value as List<String>
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel resourceRequirements(ResourceRequirement... value) {
|
||||
this.resourceRequirements = value as List<ResourceRequirement>
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel jobRoleArn(String value) {
|
||||
this.jobRoleArn = value
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel executionRoleArn(String value) {
|
||||
this.executionRoleArn = value
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel user(String user) {
|
||||
this.user = user
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel readonlyRootFilesystem(boolean value) {
|
||||
this.readonlyRootFilesystem = value
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel environment(ArrayList<KeyValuePair> value) {
|
||||
this.environment = value
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel linuxParameters(LinuxParameters value) {
|
||||
this.linuxParameters = value
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel privileged(boolean value) {
|
||||
this.privileged = value
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel ulimits(ArrayList<Ulimit> value) {
|
||||
this.ulimits = value
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel logConfiguration(LogConfiguration value) {
|
||||
this.logConfiguration = value
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel mountPoints(List<MountPoint> value) {
|
||||
this.mountPoints = value as List<MountPoint>
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel volumes(List<Volume> value) {
|
||||
this.volumes = value as List<Volume>
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel networkConfiguration(NetworkConfiguration value) {
|
||||
this.networkConfiguration = value
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel ephemeralStorage(EphemeralStorage value) {
|
||||
this.ephemeralStorage = value
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel runtimePlatform(RuntimePlatform value) {
|
||||
this.runtimePlatform = value
|
||||
return this
|
||||
}
|
||||
|
||||
ContainerPropertiesModel secrets(List<Secret> value) {
|
||||
this.secrets = value
|
||||
return this
|
||||
}
|
||||
|
||||
LinuxParameters getLinuxParameters() {
|
||||
return linuxParameters
|
||||
}
|
||||
|
||||
ArrayList<KeyValuePair> getEnvironment() {
|
||||
return environment
|
||||
}
|
||||
|
||||
boolean getPrivileged() {
|
||||
return privileged
|
||||
}
|
||||
|
||||
String getUser() {
|
||||
return user
|
||||
}
|
||||
|
||||
boolean getReadonlyRootFilesystem() {
|
||||
return readonlyRootFilesystem
|
||||
}
|
||||
|
||||
ArrayList<Ulimit> getUlimits() {
|
||||
return ulimits
|
||||
}
|
||||
|
||||
String getImage() {
|
||||
return image
|
||||
}
|
||||
|
||||
List<String> getCommand() {
|
||||
return command
|
||||
}
|
||||
|
||||
List<ResourceRequirement> getResourceRequirements() {
|
||||
return resourceRequirements
|
||||
}
|
||||
|
||||
String getJobRoleArn() {
|
||||
return jobRoleArn
|
||||
}
|
||||
|
||||
String getExecutionRoleArn() {
|
||||
return executionRoleArn
|
||||
}
|
||||
|
||||
LogConfiguration getLogConfiguration() {
|
||||
return logConfiguration
|
||||
}
|
||||
|
||||
List<MountPoint> getMountPoints() {
|
||||
return mountPoints
|
||||
}
|
||||
|
||||
List<Volume> getVolumes() {
|
||||
return volumes
|
||||
}
|
||||
|
||||
NetworkConfiguration getNetworkConfiguration() {
|
||||
return networkConfiguration
|
||||
}
|
||||
|
||||
EphemeralStorage getEphemeralStorage() {
|
||||
return ephemeralStorage
|
||||
}
|
||||
|
||||
RuntimePlatform getRuntimePlatform() {
|
||||
return runtimePlatform
|
||||
}
|
||||
|
||||
ContainerProperties toBatchContainerProperties() {
|
||||
def builder = ContainerProperties.builder()
|
||||
|
||||
if (image) builder.image(image)
|
||||
if (command) builder.command(command)
|
||||
if (resourceRequirements) builder.resourceRequirements(resourceRequirements)
|
||||
if (jobRoleArn) builder.jobRoleArn(jobRoleArn)
|
||||
if (executionRoleArn) builder.executionRoleArn(executionRoleArn)
|
||||
if (linuxParameters) builder.linuxParameters(linuxParameters)
|
||||
if (environment) builder.environment(environment)
|
||||
if (privileged) builder.privileged(privileged)
|
||||
if (user) builder.user(user)
|
||||
if (readonlyRootFilesystem) builder.readonlyRootFilesystem(readonlyRootFilesystem)
|
||||
if (ulimits) builder.ulimits(ulimits)
|
||||
if (logConfiguration) builder.logConfiguration(logConfiguration)
|
||||
if (mountPoints) builder.mountPoints(mountPoints)
|
||||
if (volumes) builder.volumes(volumes)
|
||||
if (networkConfiguration) builder.networkConfiguration(networkConfiguration)
|
||||
if (ephemeralStorage) builder.ephemeralStorage(ephemeralStorage)
|
||||
if (runtimePlatform) builder.runtimePlatform(runtimePlatform)
|
||||
if (secrets) builder.secrets(secrets)
|
||||
|
||||
return builder.build()
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ContainerPropertiesModel{" +
|
||||
"image='" + image + '\'' +
|
||||
", command=" + command +
|
||||
", resourceRequirements=" + resourceRequirements +
|
||||
", jobRoleArn='" + jobRoleArn + '\'' +
|
||||
", executionRoleArn='" + executionRoleArn + '\'' +
|
||||
", linuxParameters=" + linuxParameters +
|
||||
", environment=" + environment +
|
||||
", privileged=" + privileged +
|
||||
", user='" + user + '\'' +
|
||||
", readonlyRootFilesystem=" + readonlyRootFilesystem +
|
||||
", ulimits=" + ulimits +
|
||||
", logConfiguration=" + logConfiguration +
|
||||
", mountPoints=" + mountPoints +
|
||||
", volumes=" + volumes +
|
||||
", networkConfiguration=" + networkConfiguration +
|
||||
", ephemeralStorage=" + ephemeralStorage +
|
||||
", runtimePlatform=" + runtimePlatform +
|
||||
", secrets=" + secrets +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch.model
|
||||
|
||||
|
||||
import groovy.transform.CompileStatic
|
||||
import software.amazon.awssdk.services.batch.model.ConsumableResourceProperties
|
||||
import software.amazon.awssdk.services.batch.model.JobDefinitionType
|
||||
import software.amazon.awssdk.services.batch.model.PlatformCapability
|
||||
import software.amazon.awssdk.services.batch.model.RegisterJobDefinitionRequest
|
||||
|
||||
/**
|
||||
* Custom mutable RegisterJobDefinitionRequest class that allows subclasses to modify the request
|
||||
* before converting it to the immutable AWS SDK object.
|
||||
*
|
||||
* This is a mutable version of {@link RegisterJobDefinitionRequest} required
|
||||
* to simplify the extension of container settings in the AWS Batch executor
|
||||
* and its sub-classes (e.g. nf-xpack).
|
||||
*/
|
||||
@CompileStatic
|
||||
class RegisterJobDefinitionModel {
|
||||
|
||||
private String jobDefinitionName
|
||||
|
||||
private JobDefinitionType type
|
||||
|
||||
private List<PlatformCapability> platformCapabilities
|
||||
|
||||
private ContainerPropertiesModel containerProperties
|
||||
|
||||
private Map<String,String> parameters
|
||||
|
||||
private Map<String,String> tags
|
||||
|
||||
private ConsumableResourceProperties consumableResourceProperties
|
||||
|
||||
RegisterJobDefinitionModel jobDefinitionName(String value) {
|
||||
this.jobDefinitionName = value
|
||||
return this
|
||||
}
|
||||
|
||||
RegisterJobDefinitionModel type(JobDefinitionType value) {
|
||||
this.type = value
|
||||
return this
|
||||
}
|
||||
|
||||
RegisterJobDefinitionModel platformCapabilities(List<PlatformCapability> value) {
|
||||
this.platformCapabilities = value
|
||||
return this
|
||||
}
|
||||
|
||||
RegisterJobDefinitionModel containerProperties(ContainerPropertiesModel value) {
|
||||
this.containerProperties = value
|
||||
return this
|
||||
}
|
||||
|
||||
RegisterJobDefinitionModel parameters(Map<String,String> value) {
|
||||
this.parameters = value
|
||||
return this
|
||||
}
|
||||
|
||||
RegisterJobDefinitionModel tags(Map<String,String> value) {
|
||||
this.tags = value
|
||||
return this
|
||||
}
|
||||
|
||||
RegisterJobDefinitionModel addTagsEntry(String key, String value) {
|
||||
if( this.tags==null )
|
||||
this.tags = new LinkedHashMap<>()
|
||||
this.tags.put(key, value)
|
||||
return this
|
||||
}
|
||||
|
||||
RegisterJobDefinitionModel consumableResourceProperties(ConsumableResourceProperties value) {
|
||||
this.consumableResourceProperties = value
|
||||
return this
|
||||
}
|
||||
|
||||
String getJobDefinitionName() {
|
||||
return jobDefinitionName
|
||||
}
|
||||
|
||||
JobDefinitionType getType() {
|
||||
return type
|
||||
}
|
||||
|
||||
List<PlatformCapability> getPlatformCapabilities() {
|
||||
return platformCapabilities
|
||||
}
|
||||
|
||||
ContainerPropertiesModel getContainerProperties() {
|
||||
return containerProperties
|
||||
}
|
||||
|
||||
Map<String, String> getParameters() {
|
||||
return parameters
|
||||
}
|
||||
|
||||
Map<String, String> getTags() {
|
||||
return tags
|
||||
}
|
||||
|
||||
ConsumableResourceProperties getConsumableResourceProperties() {
|
||||
return consumableResourceProperties
|
||||
}
|
||||
|
||||
RegisterJobDefinitionRequest toBatchRequest() {
|
||||
final builder = RegisterJobDefinitionRequest.builder()
|
||||
|
||||
if (jobDefinitionName)
|
||||
builder.jobDefinitionName(jobDefinitionName)
|
||||
if (type)
|
||||
builder.type(type)
|
||||
if (platformCapabilities)
|
||||
builder.platformCapabilities(platformCapabilities)
|
||||
if (containerProperties)
|
||||
builder.containerProperties(containerProperties.toBatchContainerProperties())
|
||||
if (consumableResourceProperties)
|
||||
builder.consumableResourceProperties(consumableResourceProperties)
|
||||
if (parameters)
|
||||
builder.parameters(parameters)
|
||||
if (tags)
|
||||
builder.tags(tags)
|
||||
|
||||
return (RegisterJobDefinitionRequest) builder.build()
|
||||
}
|
||||
|
||||
@Override
|
||||
String toString() {
|
||||
return "RegisterJobDefinitionModel{" +
|
||||
"jobDefinitionName='" + jobDefinitionName + '\'' +
|
||||
", type=" + type +
|
||||
", platformCapabilities=" + platformCapabilities +
|
||||
", containerProperties=" + containerProperties +
|
||||
", parameters=" + parameters +
|
||||
", tags=" + tags +
|
||||
", consumableResourceProperties=" + consumableResourceProperties +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,215 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.config
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.util.logging.Slf4j
|
||||
import nextflow.SysEnv
|
||||
import nextflow.cloud.CloudTransferOptions
|
||||
import nextflow.cloud.aws.batch.AwsOptions
|
||||
import nextflow.config.spec.ConfigOption
|
||||
import nextflow.config.spec.ConfigScope
|
||||
import nextflow.script.dsl.Description
|
||||
import nextflow.exception.ProcessUnrecoverableException
|
||||
import nextflow.util.Duration
|
||||
|
||||
/**
|
||||
* Model AWS Batch config settings
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@Slf4j
|
||||
@CompileStatic
|
||||
class AwsBatchConfig implements CloudTransferOptions, ConfigScope {
|
||||
|
||||
public static final int DEFAULT_AWS_MAX_ATTEMPTS = 5
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The path where the AWS command line tool is installed in the host AMI.
|
||||
""")
|
||||
final String cliPath
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
Delay between download attempts from S3 (default: `10 sec`).
|
||||
""")
|
||||
final Duration delayBetweenAttempts
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The AWS Batch [Execution Role](https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html) ARN that needs to be used to execute the Batch Job. It is mandatory when using AWS Fargate.
|
||||
""")
|
||||
final String executionRole
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
When `true`, add the `--force-glacier-transfer` flag to AWS CLI S3 download commands (default: `false`).
|
||||
""")
|
||||
final boolean forceGlacierTransfer
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The AWS Batch Job Role ARN that needs to be used to execute the Batch Job.
|
||||
""")
|
||||
final String jobRole
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The name of the logs group used by Batch Jobs (default: `/aws/batch/job`).
|
||||
""")
|
||||
final String logsGroup
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
Max parallel upload/download transfer operations *per job* (default: `4`).
|
||||
""")
|
||||
final int maxParallelTransfers
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
Max number of execution attempts of a job interrupted by a EC2 Spot reclaim event (default: `0`)
|
||||
""")
|
||||
final Integer maxSpotAttempts
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
Max number of downloads attempts from S3 (default: `1`).
|
||||
""")
|
||||
final int maxTransferAttempts
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The compute platform type used by AWS Batch. Can be either `ec2` or `fargate`. Set to `fargate` to use [AWS Fargate](https://docs.aws.amazon.com/batch/latest/userguide/fargate.html).
|
||||
""")
|
||||
final String platformType
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The [retry mode](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-retries.html) used to handle rate-limiting by AWS APIs. Can be one of `standard`, `legacy`, `adaptive`, or `built-in` (default: `standard`).
|
||||
""")
|
||||
final String retryMode
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The scheduling priority for all tasks when using [fair-share scheduling](https://aws.amazon.com/blogs/hpc/introducing-fair-share-scheduling-for-aws-batch/) (default: `0`).
|
||||
""")
|
||||
final Integer schedulingPriority
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The share identifier for all tasks when using [fair-share scheduling](https://aws.amazon.com/blogs/hpc/introducing-fair-share-scheduling-for-aws-batch/).
|
||||
""")
|
||||
final String shareIdentifier
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
When `true`, jobs that cannot be scheduled due to lack of resources or misconfiguration are terminated and handled as task failures (default: `false`).
|
||||
""")
|
||||
final boolean terminateUnschedulableJobs
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
List of container mounts. Mounts can be specified as simple e.g. `/some/path` or canonical format e.g. `/host/path:/mount/path[:ro|rw]`.
|
||||
""")
|
||||
final List<String> volumes
|
||||
|
||||
/**
|
||||
* The path for the `s5cmd` tool as an alternative to `aws s3` CLI to upload/download files
|
||||
*/
|
||||
String s5cmdPath
|
||||
|
||||
/**
|
||||
* Whenever it should use Fargate API
|
||||
*/
|
||||
boolean fargateMode
|
||||
|
||||
AwsBatchConfig(Map opts) {
|
||||
fargateMode = opts.platformType == 'fargate'
|
||||
cliPath = !fargateMode ? parseCliPath(opts.cliPath as String) : null
|
||||
s5cmdPath = fargateMode ? parses5cmdPath(opts.cliPath as String) : null
|
||||
maxParallelTransfers = opts.maxParallelTransfers as Integer ?: MAX_TRANSFER
|
||||
maxTransferAttempts = opts.maxTransferAttempts as Integer ?: defaultMaxTransferAttempts()
|
||||
delayBetweenAttempts = opts.delayBetweenAttempts as Duration ?: DEFAULT_DELAY_BETWEEN_ATTEMPTS
|
||||
maxSpotAttempts = opts.maxSpotAttempts!=null ? opts.maxSpotAttempts as Integer : null
|
||||
volumes = makeVols(opts.volumes)
|
||||
jobRole = opts.jobRole
|
||||
logsGroup = opts.logsGroup
|
||||
retryMode = opts.retryMode ?: 'standard'
|
||||
shareIdentifier = opts.shareIdentifier
|
||||
schedulingPriority = opts.schedulingPriority as Integer ?: 0
|
||||
executionRole = opts.executionRole
|
||||
terminateUnschedulableJobs = opts.terminateUnschedulableJobs as boolean
|
||||
forceGlacierTransfer = opts.forceGlacierTransfer as boolean
|
||||
if( retryMode == 'built-in' )
|
||||
retryMode = null // this force falling back on NF built-in retry mode instead of delegating to AWS CLI tool
|
||||
if( retryMode && retryMode !in AwsOptions.VALID_RETRY_MODES )
|
||||
log.warn "Unexpected value for 'aws.batch.retryMode' config setting - offending value: $retryMode - valid values: ${AwsOptions.VALID_RETRY_MODES.join(',')}"
|
||||
}
|
||||
|
||||
protected int defaultMaxTransferAttempts() {
|
||||
final env = SysEnv.get()
|
||||
return env.AWS_MAX_ATTEMPTS ? env.AWS_MAX_ATTEMPTS as int : DEFAULT_AWS_MAX_ATTEMPTS
|
||||
}
|
||||
|
||||
private String parseCliPath(String value) {
|
||||
if( !value )
|
||||
return null
|
||||
if( value.tokenize('/ ').contains('s5cmd') )
|
||||
return null
|
||||
if( !value.startsWith('/') )
|
||||
throw new ProcessUnrecoverableException("Not a valid aws-cli tools path: $value -- it must be an absolute path")
|
||||
if( !value.endsWith('/bin/aws'))
|
||||
throw new ProcessUnrecoverableException("Not a valid aws-cli tools path: $value -- it must end with the `/bin/aws` suffix")
|
||||
return value
|
||||
}
|
||||
|
||||
protected List<String> makeVols(obj) {
|
||||
if( !obj )
|
||||
return new ArrayList<String>(10)
|
||||
if( obj instanceof List )
|
||||
return ((List)obj).collect { normPath0(it.toString()) }
|
||||
if( obj instanceof CharSequence )
|
||||
return obj.toString().tokenize(',').collect { normPath0(it) }
|
||||
throw new IllegalArgumentException("Not a valid `aws.batch.volumes` value: $obj [${obj.getClass().getName()}]")
|
||||
}
|
||||
|
||||
protected String normPath0(String it) {
|
||||
def result = it.trim()
|
||||
while( result.endsWith('/') && result.size()>1 )
|
||||
result = result.substring(0,result.size()-1)
|
||||
return result
|
||||
}
|
||||
|
||||
AwsBatchConfig addVolume(Path path) {
|
||||
assert path.scheme == 'file'
|
||||
def location = path.toString()
|
||||
if( !volumes.contains(location) )
|
||||
volumes.add(location)
|
||||
return this
|
||||
}
|
||||
|
||||
protected String parses5cmdPath(String value) {
|
||||
if( !value )
|
||||
return 's5cmd'
|
||||
if( value.tokenize('/ ').contains('s5cmd') )
|
||||
return value
|
||||
return 's5cmd'
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,221 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.config
|
||||
|
||||
import java.nio.file.Path
|
||||
import java.nio.file.Paths
|
||||
|
||||
import software.amazon.awssdk.regions.Region
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.util.logging.Slf4j
|
||||
import nextflow.Global
|
||||
import nextflow.SysEnv
|
||||
import nextflow.config.spec.ConfigOption
|
||||
import nextflow.config.spec.ConfigScope
|
||||
import nextflow.config.spec.ScopeName
|
||||
import nextflow.script.dsl.Description
|
||||
import nextflow.util.IniFile
|
||||
/**
|
||||
* Model AWS cloud configuration settings
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@ScopeName("aws")
|
||||
@Description("""
|
||||
The `aws` scope controls the interactions with AWS, including AWS Batch and S3.
|
||||
""")
|
||||
@Slf4j
|
||||
@CompileStatic
|
||||
class AwsConfig implements ConfigScope {
|
||||
|
||||
final AwsBatchConfig batch
|
||||
|
||||
final AwsS3Config client
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
AWS region (e.g. `us-east-1`).
|
||||
""")
|
||||
final String region
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
AWS account access key.
|
||||
""")
|
||||
final String accessKey
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
AWS account secret key.
|
||||
""")
|
||||
final String secretKey
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
AWS profile from `~/.aws/credentials`.
|
||||
""")
|
||||
final String profile
|
||||
|
||||
/* required by extension point -- do not remove */
|
||||
AwsConfig() {}
|
||||
|
||||
AwsConfig(Map opts) {
|
||||
this.accessKey = opts.accessKey
|
||||
this.secretKey = opts.secretKey
|
||||
this.profile = getAwsProfile0(SysEnv.get(), opts)
|
||||
this.region = getAwsRegion(SysEnv.get(), opts)
|
||||
this.batch = new AwsBatchConfig((Map)opts.batch ?: Collections.emptyMap())
|
||||
this.client = new AwsS3Config((Map)opts.client ?: Collections.emptyMap())
|
||||
}
|
||||
|
||||
List<String> getCredentials() {
|
||||
return accessKey && secretKey
|
||||
? List.of(accessKey, secretKey)
|
||||
: Collections.<String>emptyList()
|
||||
}
|
||||
|
||||
AwsS3Config getS3Config() { client }
|
||||
|
||||
AwsBatchConfig getBatchConfig() { batch }
|
||||
|
||||
@Deprecated
|
||||
String getS3GlobalRegion() {
|
||||
return !region || !s3Config.endpoint || s3Config.endpoint.contains(".amazonaws.com")
|
||||
? Region.US_EAST_1.id() // always use US_EAST_1 as global region for AWS endpoints
|
||||
: region // for custom endpoint use the config provided region
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the region used for S3 evaluating the region resolved from config and a possible region defined in the endpoint.
|
||||
* Fallback to the global region US_EAST_1 when no region is found.
|
||||
*
|
||||
* Preference:
|
||||
* 1. endpoint region
|
||||
* 2. config region
|
||||
* 3. US_EAST_1
|
||||
*
|
||||
* @returns Resolved region.
|
||||
**/
|
||||
String resolveS3Region() {
|
||||
final epRegion = client.getEndpointRegion()
|
||||
return epRegion ?: this.region ?: Region.US_EAST_1.id()
|
||||
}
|
||||
|
||||
static protected String getAwsProfile0(Map env, Map<String,Object> config) {
|
||||
|
||||
final profile = config?.profile as String
|
||||
if( profile )
|
||||
return profile
|
||||
|
||||
if( env?.containsKey('AWS_PROFILE'))
|
||||
return env.get('AWS_PROFILE')
|
||||
|
||||
if( env?.containsKey('AWS_DEFAULT_PROFILE'))
|
||||
return env.get('AWS_DEFAULT_PROFILE')
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
|
||||
static protected String getAwsRegion(Map env, Map config) {
|
||||
|
||||
def home = Paths.get(System.properties.get('user.home') as String)
|
||||
def file = home.resolve('.aws/config')
|
||||
|
||||
return getAwsRegion0(env, config, file)
|
||||
}
|
||||
|
||||
static protected String getAwsRegion0(Map env, Map config, Path awsFile) {
|
||||
// check nxf config file
|
||||
if( config instanceof Map ) {
|
||||
def region = config.region
|
||||
if( region )
|
||||
return region.toString()
|
||||
}
|
||||
|
||||
if( env && env.AWS_DEFAULT_REGION ) {
|
||||
return env.AWS_DEFAULT_REGION.toString()
|
||||
}
|
||||
|
||||
if( !awsFile.exists() ) {
|
||||
return null
|
||||
}
|
||||
|
||||
final profile = getAwsProfile0(env, config) ?: 'default'
|
||||
final ini = new IniFile(awsFile)
|
||||
return ini.section(profile).region
|
||||
}
|
||||
|
||||
Map getS3LegacyProperties() {
|
||||
final result = new LinkedHashMap(20)
|
||||
|
||||
// -- remaining client config options
|
||||
def config = client.getAwsClientConfig()
|
||||
config = checkDefaultErrorRetry(config, SysEnv.get())
|
||||
if( config ) {
|
||||
result.putAll(config)
|
||||
}
|
||||
|
||||
log.debug "AWS S3 config properties: ${dumpAwsConfig(result)}"
|
||||
return result
|
||||
}
|
||||
|
||||
static protected Map checkDefaultErrorRetry(Map result, Map env) {
|
||||
if( result == null )
|
||||
result = new HashMap(10)
|
||||
|
||||
if( result.max_error_retry==null ) {
|
||||
result.max_error_retry = env?.AWS_MAX_ATTEMPTS
|
||||
}
|
||||
// fallback to default
|
||||
if( result.max_error_retry==null ) {
|
||||
result.max_error_retry = '5'
|
||||
}
|
||||
// make sure that's a string value as it's expected by the client
|
||||
else {
|
||||
result.max_error_retry = result.max_error_retry.toString()
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
static private String dumpAwsConfig( Map<String,String> config ) {
|
||||
def result = new HashMap(config)
|
||||
if( config.access_key && config.access_key.size()>6 )
|
||||
result.access_key = "${config.access_key.substring(0,6)}.."
|
||||
|
||||
if( config.secret_key && config.secret_key.size()>6 )
|
||||
result.secret_key = "${config.secret_key.substring(0,6)}.."
|
||||
|
||||
if( config.session_token && config.session_token.size()>6 )
|
||||
result.session_token = "${config.session_token.substring(0,6)}.."
|
||||
|
||||
return result.toString()
|
||||
}
|
||||
|
||||
static private AwsConfig getConfig0(Map config) {
|
||||
if( config==null ) {
|
||||
log.warn("Missing nextflow session config object")
|
||||
return new AwsConfig(Collections.emptyMap())
|
||||
}
|
||||
new AwsConfig( (Map)config.aws ?: Collections.emptyMap() )
|
||||
}
|
||||
|
||||
static AwsConfig config() {
|
||||
getConfig0(Global.config)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,365 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.config
|
||||
|
||||
import static nextflow.cloud.aws.util.AwsHelper.*
|
||||
|
||||
import software.amazon.awssdk.regions.Region
|
||||
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.util.logging.Slf4j
|
||||
import nextflow.SysEnv
|
||||
import nextflow.config.spec.ConfigOption
|
||||
import nextflow.config.spec.ConfigScope
|
||||
import nextflow.script.dsl.Description
|
||||
import nextflow.file.FileHelper
|
||||
import nextflow.util.Duration
|
||||
import nextflow.util.MemoryUnit
|
||||
/**
|
||||
* Model AWS S3 config settings
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@Slf4j
|
||||
@CompileStatic
|
||||
class AwsS3Config implements ConfigScope {
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
Allow the access of public S3 buckets without providing AWS credentials (default: `false`). Any service that does not accept unsigned requests will return a service access error.
|
||||
""")
|
||||
final Boolean anonymous
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The amount of time to wait (in milliseconds) when initially establishing a connection before timing out (default: `10000`).
|
||||
""")
|
||||
final Integer connectionTimeout
|
||||
|
||||
final Boolean debug
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The AWS S3 API entry point e.g. `https://s3-us-west-1.amazonaws.com`. The endpoint must include the protocol prefix e.g. `https://`.
|
||||
""")
|
||||
final String endpoint
|
||||
|
||||
/**
|
||||
* Maximum number of concurrent transfers used by S3 transfer manager. By default,
|
||||
* it is determined automatically by `targetThroughputInGbps`.
|
||||
*/
|
||||
@ConfigOption
|
||||
final Integer maxConcurrency
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The maximum number of open HTTP connections used by the S3 client (default: `50`).
|
||||
""")
|
||||
final Integer maxConnections
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The maximum size for the heap memory buffer used by concurrent downloads. It must be at least 10 times the `minimumPartSize` (default:`400 MB`).
|
||||
""")
|
||||
final MemoryUnit maxDownloadHeapMemory
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The maximum number of retry attempts for failed retryable requests (default: `-1`).
|
||||
""")
|
||||
final Integer maxErrorRetry
|
||||
|
||||
/**
|
||||
* Maximum native memory used by S3 transfer manager. By default, it is
|
||||
* determined automatically by `targetThroughputInGbps`.
|
||||
*/
|
||||
@ConfigOption
|
||||
final MemoryUnit maxNativeMemory
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The minimum part size used for multipart uploads to S3 (default: `8 MB`).
|
||||
""")
|
||||
final MemoryUnit minimumPartSize
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The object size threshold used for multipart uploads to S3 (default: same as `aws.cllient.minimumPartSize`).
|
||||
""")
|
||||
final MemoryUnit multipartThreshold
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The proxy host to connect through.
|
||||
""")
|
||||
final String proxyHost
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The port to use when connecting through a proxy.
|
||||
""")
|
||||
final Integer proxyPort
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The protocol scheme to use when connecting through a proxy. Can be `http` or `https` (default: `'http'`).
|
||||
""")
|
||||
final String proxyScheme
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The user name to use when connecting through a proxy.
|
||||
""")
|
||||
final String proxyUsername
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The password to use when connecting through a proxy.
|
||||
""")
|
||||
final String proxyPassword
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
Use [Requester Pays](https://docs.aws.amazon.com/AmazonS3/latest/userguide/RequesterPaysBuckets.html) for S3 buckets (default: `false`).
|
||||
""")
|
||||
final Boolean requesterPays
|
||||
|
||||
@ConfigOption(types=[String])
|
||||
@Description("""
|
||||
Specify predefined bucket permissions, also known as [canned ACL](https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html#canned-acl). Can be one of `Private`, `PublicRead`, `PublicReadWrite`, `AuthenticatedRead`, `LogDeliveryWrite`, `BucketOwnerRead`, `BucketOwnerFullControl`, or `AwsExecRead`.
|
||||
""")
|
||||
final ObjectCannedACL s3Acl
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
Use the path-based access model to access objects in S3-compatible storage systems (default: `false`).
|
||||
""")
|
||||
final Boolean s3PathStyleAccess
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The amount of time to wait (in milliseconds) for data to be transferred over an established, open connection before the connection is timed out (default: `30000`).
|
||||
""")
|
||||
final Integer socketTimeout
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The S3 storage class applied to stored objects, one of \\[`STANDARD`, `STANDARD_IA`, `ONEZONE_IA`, `INTELLIGENT_TIERING`\\] (default: `STANDARD`).
|
||||
""")
|
||||
final String storageClass
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The S3 server side encryption to be used when saving objects on S3. Can be `AES256` or `aws:kms` (default: none).
|
||||
""")
|
||||
final String storageEncryption
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The AWS KMS key Id to be used to encrypt files stored in the target S3 bucket.
|
||||
""")
|
||||
final String storageKmsKeyId
|
||||
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The target network throughput (in Gbps) used for S3 uploads and downloads (default: `10`).
|
||||
""")
|
||||
final Double targetThroughputInGbps
|
||||
|
||||
// deprecated
|
||||
|
||||
@Deprecated
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The size of a single part in a multipart upload (default: `100 MB`).
|
||||
""")
|
||||
final MemoryUnit uploadChunkSize
|
||||
|
||||
@Deprecated
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The maximum number of upload attempts after which a multipart upload returns an error (default: `5`).
|
||||
""")
|
||||
final Integer uploadMaxAttempts
|
||||
|
||||
@Deprecated
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The maximum number of threads used for multipart upload (default: `10`).
|
||||
""")
|
||||
final Integer uploadMaxThreads
|
||||
|
||||
@Deprecated
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The time to wait after a failed upload attempt to retry the part upload (default: `500ms`).
|
||||
""")
|
||||
final Duration uploadRetrySleep
|
||||
|
||||
@Deprecated
|
||||
@ConfigOption
|
||||
@Description("""
|
||||
The S3 storage class applied to stored objects. Can be `STANDARD`, `STANDARD_IA`, `ONEZONE_IA`, or `INTELLIGENT_TIERING` (default: `STANDARD`).
|
||||
""")
|
||||
final String uploadStorageClass
|
||||
|
||||
private static final long _1MB = 1024 * 1024;
|
||||
// According to CRT Async client docs https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/s3/S3CrtAsyncClientBuilder.html
|
||||
public static final long DEFAULT_PART_SIZE = 8 * _1MB;
|
||||
public static final int DEFAULT_INIT_BUFFER_PARTS = 10;
|
||||
// Maximum heap buffer size
|
||||
public static final long DEFAULT_MAX_DOWNLOAD_BUFFER_SIZE = 400 * _1MB;
|
||||
|
||||
AwsS3Config(Map opts) {
|
||||
this.anonymous = opts.anonymous as Boolean
|
||||
this.connectionTimeout = opts.connectionTimeout as Integer
|
||||
this.debug = opts.debug as Boolean
|
||||
this.endpoint = opts.endpoint ?: SysEnv.get('AWS_S3_ENDPOINT')
|
||||
if( endpoint && FileHelper.getUrlProtocol(endpoint) !in ['http','https'] )
|
||||
throw new IllegalArgumentException("S3 endpoint must begin with http:// or https:// prefix - offending value: '${endpoint}'")
|
||||
this.maxConcurrency = opts.maxConcurrency as Integer
|
||||
this.maxConnections = opts.maxConnections as Integer
|
||||
this.maxDownloadHeapMemory = opts.maxDownloadHeapMemory as MemoryUnit
|
||||
this.maxErrorRetry = opts.maxErrorRetry as Integer
|
||||
this.maxNativeMemory = opts.maxNativeMemory as MemoryUnit
|
||||
this.minimumPartSize = opts.minimumPartSize as MemoryUnit
|
||||
this.multipartThreshold = opts.multipartThreshold as MemoryUnit
|
||||
this.proxyHost = opts.proxyHost
|
||||
this.proxyPort = opts.proxyPort as Integer
|
||||
this.proxyScheme = opts.proxyScheme
|
||||
this.proxyUsername = opts.proxyUsername
|
||||
this.proxyPassword = opts.proxyPassword
|
||||
this.requesterPays = opts.requesterPays as Boolean
|
||||
this.s3Acl = parseS3Acl(opts.s3Acl as String)
|
||||
this.s3PathStyleAccess = opts.s3PathStyleAccess as Boolean
|
||||
this.socketTimeout = opts.socketTimeout as Integer
|
||||
this.storageClass = parseStorageClass((opts.storageClass ?: opts.uploadStorageClass) as String) // 'uploadStorageClass' is kept for legacy purposes
|
||||
this.storageEncryption = parseStorageEncryption(opts.storageEncryption as String)
|
||||
this.storageKmsKeyId = opts.storageKmsKeyId
|
||||
this.targetThroughputInGbps = opts.targetThroughputInGbps as Double
|
||||
this.uploadChunkSize = opts.uploadChunkSize as MemoryUnit
|
||||
this.uploadMaxAttempts = opts.uploadMaxAttempts as Integer
|
||||
this.uploadMaxThreads = opts.uploadMaxThreads as Integer
|
||||
this.uploadRetrySleep = opts.uploadRetrySleep as Duration
|
||||
checkDownloadBufferParams()
|
||||
}
|
||||
|
||||
private String parseStorageClass(String value) {
|
||||
if( value in [null, 'STANDARD', 'STANDARD_IA', 'ONEZONE_IA', 'INTELLIGENT_TIERING', 'REDUCED_REDUNDANCY' ]) {
|
||||
if (value == 'REDUCED_REDUNDANCY') {
|
||||
log.warn "AWS S3 Storage Class `REDUCED_REDUNDANCY` is deprecated (and more expensive than `STANDARD`). For cost savings, look to `STANDARD_IA`, `ONEZONE_IA`, `INTELLIGENT_TIERING`."
|
||||
}
|
||||
return value
|
||||
} else {
|
||||
log.warn "Unsupported AWS storage-class: $value"
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
private String parseStorageEncryption(String value) {
|
||||
if( value in [null,'AES256','aws:kms'] )
|
||||
return value
|
||||
//
|
||||
log.warn "Unsupported AWS storage-encryption: $value"
|
||||
return null
|
||||
}
|
||||
|
||||
// ==== getters =====
|
||||
|
||||
Boolean getPathStyleAccess() {
|
||||
return s3PathStyleAccess
|
||||
}
|
||||
|
||||
boolean isCustomEndpoint() {
|
||||
endpoint && !endpoint.endsWith(".amazonaws.com")
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the region defined in endpoints such as https://xxx.<region>.amazonaws.com
|
||||
* @returns Region defined in the endpoint. Null if no endpoint or custom endpoint is defined,
|
||||
* or when URI region subdomain doesn't match with a region (global or multi-region access point)
|
||||
*/
|
||||
String getEndpointRegion(){
|
||||
if( !endpoint || isCustomEndpoint() )
|
||||
return null
|
||||
|
||||
try {
|
||||
String host = URI.create(endpoint).getHost()
|
||||
final hostDomains = host.split('\\.')
|
||||
if (hostDomains.size() < 3) {
|
||||
log.debug("Region subdomain doesn't exist in endpoint '${endpoint}'")
|
||||
return null
|
||||
}
|
||||
final region = hostDomains[hostDomains.size()-3]
|
||||
if (!Region.regions().contains(Region.of(region))){
|
||||
log.debug("Region '${region}' extracted from endpoint '${endpoint}' is not valid")
|
||||
return null
|
||||
}
|
||||
return region
|
||||
|
||||
} catch (Exception e){
|
||||
log.debug("Exception getting region from endpoint: '${endpoint}' - ${e.message}")
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
Map<String,String> getAwsClientConfig() {
|
||||
return [
|
||||
connection_timeout: connectionTimeout?.toString(),
|
||||
max_concurrency: maxConcurrency?.toString(),
|
||||
max_connections: maxConnections?.toString(),
|
||||
max_download_heap_memory: maxDownloadHeapMemory?.toBytes()?.toString(),
|
||||
max_error_retry: maxErrorRetry?.toString(),
|
||||
max_native_memory: maxNativeMemory?.toBytes()?.toString(),
|
||||
minimum_part_size: minimumPartSize?.toBytes()?.toString(),
|
||||
multipart_threshold: multipartThreshold?.toBytes()?.toString(),
|
||||
proxy_host: proxyHost?.toString(),
|
||||
proxy_port: proxyPort?.toString(),
|
||||
proxy_scheme: proxyScheme?.toString(),
|
||||
proxy_username: proxyUsername?.toString(),
|
||||
proxy_password: proxyPassword?.toString(),
|
||||
requester_pays: requesterPays?.toString(),
|
||||
s3_acl: s3Acl?.toString(),
|
||||
socket_timeout: socketTimeout?.toString(),
|
||||
storage_encryption: storageEncryption?.toString(),
|
||||
storage_kms_key_id: storageKmsKeyId?.toString(),
|
||||
target_throughput_in_gbps: targetThroughputInGbps?.toString(),
|
||||
upload_chunk_size: uploadChunkSize?.toBytes()?.toString(),
|
||||
upload_max_attempts: uploadMaxAttempts?.toString(),
|
||||
upload_max_threads: uploadMaxThreads?.toString(),
|
||||
upload_retry_sleep: uploadRetrySleep?.toMillis()?.toString(),
|
||||
upload_storage_class: storageClass?.toString()
|
||||
].findAll { k, v -> v != null }
|
||||
}
|
||||
|
||||
void checkDownloadBufferParams() {
|
||||
if( maxDownloadHeapMemory != null && maxDownloadHeapMemory.toBytes() == 0L ) {
|
||||
throw new IllegalArgumentException("Configuration option `aws.client.maxDownloadHeapMemory` can't be 0")
|
||||
}
|
||||
if( minimumPartSize != null && minimumPartSize.toBytes() == 0L ) {
|
||||
throw new IllegalArgumentException("Configuration option `aws.client.minimumPartSize` can't be 0")
|
||||
}
|
||||
if( maxDownloadHeapMemory != null || minimumPartSize != null ) {
|
||||
final maxBuffer = maxDownloadHeapMemory ? maxDownloadHeapMemory.toBytes() : DEFAULT_MAX_DOWNLOAD_BUFFER_SIZE
|
||||
final partSize = minimumPartSize ? minimumPartSize.toBytes() : DEFAULT_PART_SIZE
|
||||
if( maxBuffer < DEFAULT_INIT_BUFFER_PARTS * partSize ) {
|
||||
throw new IllegalArgumentException("Configuration option `aws.client.maxDownloadHeapMemory` must be at least " + DEFAULT_INIT_BUFFER_PARTS + " times `aws.client.minimumPartSize`")
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.fusion
|
||||
|
||||
import groovy.transform.CompileStatic
|
||||
import nextflow.SysEnv
|
||||
import nextflow.cloud.aws.config.AwsConfig
|
||||
import nextflow.fusion.FusionConfig
|
||||
import nextflow.fusion.FusionEnv
|
||||
import org.pf4j.Extension
|
||||
/**
|
||||
* Implements {@link FusionEnv} for AWS cloud
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@Extension
|
||||
@CompileStatic
|
||||
class AwsFusionEnv implements FusionEnv {
|
||||
|
||||
@Override
|
||||
Map<String, String> getEnvironment(String scheme, FusionConfig config) {
|
||||
if( scheme!='s3' )
|
||||
return Collections.<String,String>emptyMap()
|
||||
|
||||
final result = new HashMap<String,String>()
|
||||
final awsConfig = AwsConfig.config()
|
||||
final endpoint = awsConfig.s3Config.endpoint
|
||||
final creds = config.exportStorageCredentials() ? awsCreds(awsConfig) : List.<String>of()
|
||||
if( creds ) {
|
||||
result.AWS_ACCESS_KEY_ID = creds[0]
|
||||
result.AWS_SECRET_ACCESS_KEY = creds[1]
|
||||
|
||||
if( creds.size() > 2 )
|
||||
result.AWS_SESSION_TOKEN = creds[2]
|
||||
}
|
||||
if( endpoint )
|
||||
result.AWS_S3_ENDPOINT = endpoint
|
||||
if( awsConfig.region && awsConfig.s3Config.isCustomEndpoint() )
|
||||
result.FUSION_AWS_REGION = awsConfig.region
|
||||
if( awsConfig.s3Config.storageEncryption )
|
||||
result.FUSION_AWS_SERVER_SIDE_ENCRYPTION = awsConfig.s3Config.storageEncryption
|
||||
if( awsConfig.s3Config.storageKmsKeyId )
|
||||
result.FUSION_AWS_SSEKMS_KEY_ID = awsConfig.s3Config.storageKmsKeyId
|
||||
return result
|
||||
}
|
||||
|
||||
protected List<String> awsCreds(AwsConfig awsConfig) {
|
||||
final result = awsConfig.getCredentials()
|
||||
if( result )
|
||||
return result
|
||||
|
||||
if( SysEnv.get('AWS_ACCESS_KEY_ID') && SysEnv.get('AWS_SECRET_ACCESS_KEY') && SysEnv.get('AWS_SESSION_TOKEN') )
|
||||
return List.<String>of(SysEnv.get('AWS_ACCESS_KEY_ID'), SysEnv.get('AWS_SECRET_ACCESS_KEY'), SysEnv.get('AWS_SESSION_TOKEN'))
|
||||
|
||||
if( SysEnv.get('AWS_ACCESS_KEY_ID') && SysEnv.get('AWS_SECRET_ACCESS_KEY') )
|
||||
return List.<String>of(SysEnv.get('AWS_ACCESS_KEY_ID'), SysEnv.get('AWS_SECRET_ACCESS_KEY'))
|
||||
else
|
||||
return List.<String>of()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.mail
|
||||
|
||||
import javax.mail.internet.MimeMessage
|
||||
import software.amazon.awssdk.core.SdkBytes
|
||||
import software.amazon.awssdk.services.ses.SesClient
|
||||
import software.amazon.awssdk.services.ses.model.RawMessage
|
||||
import software.amazon.awssdk.services.ses.model.SendRawEmailRequest
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.util.logging.Slf4j
|
||||
import nextflow.mail.MailProvider
|
||||
import nextflow.mail.Mailer
|
||||
|
||||
/**
|
||||
* Send a mime message via AWS SES raw API
|
||||
*
|
||||
* https://docs.aws.amazon.com/ses/latest/dg/send-email-raw.html
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@CompileStatic
|
||||
@Slf4j
|
||||
class AwsMailProvider implements MailProvider {
|
||||
|
||||
@Override
|
||||
String name() {
|
||||
return 'aws-ses'
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean textOnly() {
|
||||
return false
|
||||
}
|
||||
|
||||
@Override
|
||||
void send(MimeMessage message, Mailer mailer) {
|
||||
final client = getEmailClient()
|
||||
// dump the message to a buffer
|
||||
final outputStream = new ByteArrayOutputStream()
|
||||
message.writeTo(outputStream)
|
||||
// send the email
|
||||
final rawMessage = RawMessage.builder().data(SdkBytes.fromByteArray(outputStream.toByteArray())).build()
|
||||
final result = client.sendRawEmail(SendRawEmailRequest.builder().rawMessage(rawMessage).build())
|
||||
log.debug "Mail message sent: ${result}"
|
||||
}
|
||||
|
||||
SesClient getEmailClient() {
|
||||
return SesClient.builder().build()
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,644 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InterruptedIOException;
|
||||
import java.nio.file.*;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Queue;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import nextflow.cloud.aws.nio.util.ExtendedS3TransferManager;
|
||||
import nextflow.cloud.aws.nio.util.S3SyncClientConfiguration;
|
||||
import nextflow.extension.FilesEx;
|
||||
import nextflow.cloud.aws.AwsClientFactory;
|
||||
import nextflow.cloud.aws.nio.util.S3AsyncClientConfiguration;
|
||||
import nextflow.cloud.aws.util.AwsHelper;
|
||||
import nextflow.util.ThreadPoolManager;
|
||||
import nextflow.util.Threads;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import software.amazon.awssdk.awscore.exception.AwsServiceException;
|
||||
import software.amazon.awssdk.core.ResponseInputStream;
|
||||
import software.amazon.awssdk.core.exception.SdkException;
|
||||
import software.amazon.awssdk.core.sync.RequestBody;
|
||||
import software.amazon.awssdk.services.s3.model.*;
|
||||
import software.amazon.awssdk.services.s3.paginators.ListObjectsV2Iterable;
|
||||
import software.amazon.awssdk.transfer.s3.S3TransferManager;
|
||||
import software.amazon.awssdk.transfer.s3.model.*;
|
||||
|
||||
/**
|
||||
* Client Amazon S3
|
||||
* @see software.amazon.awssdk.services.s3.S3Client
|
||||
*/
|
||||
public class S3Client {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(S3Client.class);
|
||||
|
||||
private software.amazon.awssdk.services.s3.S3Client client;
|
||||
|
||||
// Semaphore to limit concurrent client connections when using virtual threads.
|
||||
private Semaphore semaphore;
|
||||
|
||||
private ObjectCannedACL cannedAcl;
|
||||
|
||||
private String kmsKeyId;
|
||||
|
||||
private ServerSideEncryption storageEncryption;
|
||||
|
||||
private ExtendedS3TransferManager transferManager;
|
||||
|
||||
private ExecutorService transferPool;
|
||||
|
||||
private Integer transferManagerThreads = 10;
|
||||
|
||||
private Boolean isRequesterPaysEnabled = false;
|
||||
|
||||
private String callerAccount;
|
||||
|
||||
private AwsClientFactory factory;
|
||||
|
||||
private Properties props;
|
||||
|
||||
private boolean global;
|
||||
|
||||
public S3Client(AwsClientFactory factory, Properties props, boolean global) {
|
||||
S3SyncClientConfiguration clientConfig = S3SyncClientConfiguration.create(props);
|
||||
this.factory = factory;
|
||||
this.props = props;
|
||||
this.global = global;
|
||||
this.client = factory.getS3Client(clientConfig, global);
|
||||
this.semaphore = Threads.useVirtual() ? new Semaphore(clientConfig.getMaxConnections()) : null;
|
||||
this.callerAccount = fetchCallerAccount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an action that requires the S3 semaphore to limit concurrent connections.
|
||||
*
|
||||
* @param action
|
||||
*/
|
||||
private <T> T runWithPermit(Supplier<T> action) {
|
||||
try {
|
||||
if (semaphore != null) semaphore.acquire();
|
||||
try {
|
||||
return action.get();
|
||||
} finally {
|
||||
if (semaphore != null) semaphore.release();
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new RuntimeException("Interrupted while acquiring S3 client semaphore", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* AmazonS3Client#getS3AccountOwner() is not available in SDK v2.
|
||||
* The STSClient#getCallerIdentity returns the account, but it does not include the canonical ID required for ACLs.
|
||||
*
|
||||
* This function and the fetchCallerAccount() emulate the old behavior retrieving the canonicalId can only be
|
||||
* retrieved if the user owns a bucket.
|
||||
*/
|
||||
public String getCallerAccount() {
|
||||
return callerAccount;
|
||||
}
|
||||
|
||||
private String fetchCallerAccount() {
|
||||
try {
|
||||
List<Bucket> buckets = runWithPermit(() -> client.listBuckets(ListBucketsRequest.builder().maxBuckets(1).build()).buckets());
|
||||
if (buckets == null || buckets.isEmpty())
|
||||
return null;
|
||||
return getBucketAcl(buckets.get(0).name()).owner().id();
|
||||
} catch (Throwable e) {
|
||||
log.debug("Unable to fetch caller account - {} ", e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see software.amazon.awssdk.services.s3.S3Client#listBuckets()
|
||||
*/
|
||||
public List<Bucket> listBuckets() throws IOException {
|
||||
try {
|
||||
return runWithPermit(() -> client.listBuckets().buckets());
|
||||
} catch (SdkException e) {
|
||||
throw convertAwsException(e, "listBuckets", null, null);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see software.amazon.awssdk.services.s3.S3Client#listObjects(ListObjectsRequest)
|
||||
*/
|
||||
public ListObjectsResponse listObjects(ListObjectsRequest request) throws IOException {
|
||||
try {
|
||||
return runWithPermit(() -> client.listObjects(request));
|
||||
} catch (SdkException e) {
|
||||
throw convertAwsException(e, "listObject", request.bucket(), request.prefix());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an AWS SDK exception into the most appropriate {@link IOException} subtype
|
||||
* so callers can handle it via standard NIO semantics.
|
||||
*
|
||||
* The original {@code SdkException} is always attached as the cause for diagnostics.
|
||||
*/
|
||||
// package-private for testing
|
||||
static IOException convertAwsException(SdkException e, String method, String bucket, String key) {
|
||||
final String s3path = (key != null && !key.isEmpty())
|
||||
? "s3://" + bucket + "/" + key
|
||||
: "s3://" + (bucket != null ? bucket : "");
|
||||
final String message = String.format("Exception calling %s for %s", method, s3path);
|
||||
|
||||
if (e instanceof NoSuchBucketException || e instanceof NoSuchKeyException) {
|
||||
final NoSuchFileException nsfe = new NoSuchFileException(s3path);
|
||||
nsfe.initCause(e);
|
||||
return nsfe;
|
||||
}
|
||||
|
||||
if (e instanceof AwsServiceException) {
|
||||
final int code = ((AwsServiceException) e).statusCode();
|
||||
if (code == 404) {
|
||||
final NoSuchFileException nsfe = new NoSuchFileException(s3path);
|
||||
nsfe.initCause(e);
|
||||
return nsfe;
|
||||
}
|
||||
if (code == 401 || code == 403) {
|
||||
final AccessDeniedException ade = new AccessDeniedException(s3path, null, e.getMessage());
|
||||
ade.initCause(e);
|
||||
return ade;
|
||||
}
|
||||
}
|
||||
|
||||
return new IOException(message, e);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see software.amazon.awssdk.services.s3.S3Client#getObject
|
||||
*/
|
||||
public ResponseInputStream<GetObjectResponse> getObject(String bucketName, String key) throws IOException {
|
||||
GetObjectRequest.Builder reqBuilder = GetObjectRequest.builder().bucket(bucketName).key(key);
|
||||
if( this.isRequesterPaysEnabled )
|
||||
reqBuilder.requestPayer(RequestPayer.REQUESTER);
|
||||
try {
|
||||
return runWithPermit(() -> client.getObject(reqBuilder.build()));
|
||||
} catch (SdkException e) {
|
||||
throw convertAwsException(e, "getObject", bucketName, key);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see software.amazon.awssdk.services.s3.S3Client#putObject
|
||||
*/
|
||||
public PutObjectResponse putObject(String bucket, String key, File file) throws IOException {
|
||||
PutObjectRequest.Builder builder = PutObjectRequest.builder().bucket(bucket).key(key);
|
||||
if( cannedAcl != null ) {
|
||||
log.trace("Setting canned ACL={}; bucket={}; key={}", cannedAcl, bucket, key);
|
||||
builder.acl(cannedAcl);
|
||||
}
|
||||
try {
|
||||
return runWithPermit(() -> client.putObject(builder.build(), file.toPath()));
|
||||
} catch (SdkException e) {
|
||||
throw convertAwsException(e, "putObject", bucket, key);
|
||||
}
|
||||
}
|
||||
|
||||
private PutObjectRequest preparePutObjectRequest(PutObjectRequest.Builder reqBuilder, List<Tag> tags, String contentType, String storageClass) {
|
||||
if( cannedAcl != null ) {
|
||||
reqBuilder.acl(cannedAcl);
|
||||
}
|
||||
if( tags != null && !tags.isEmpty()) {
|
||||
reqBuilder.tagging(Tagging.builder().tagSet(tags).build());
|
||||
}
|
||||
if( kmsKeyId != null ) {
|
||||
reqBuilder.ssekmsKeyId(kmsKeyId);
|
||||
}
|
||||
if( storageEncryption!=null ) {
|
||||
reqBuilder.serverSideEncryption(storageEncryption);
|
||||
}
|
||||
if( contentType!=null ) {
|
||||
reqBuilder.contentType(contentType);
|
||||
}
|
||||
if( storageClass!=null ) {
|
||||
reqBuilder.storageClass(storageClass);
|
||||
}
|
||||
return reqBuilder.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* @see software.amazon.awssdk.services.s3.S3Client#putObject
|
||||
*/
|
||||
public PutObjectResponse putObject(String bucket, String keyName, InputStream inputStream, List<Tag> tags, String contentType, long contentLength) throws IOException {
|
||||
PutObjectRequest.Builder reqBuilder = PutObjectRequest.builder()
|
||||
.bucket(bucket)
|
||||
.key(keyName);
|
||||
if( cannedAcl != null ) {
|
||||
reqBuilder.acl(cannedAcl);
|
||||
}
|
||||
if( tags != null && !tags.isEmpty()) {
|
||||
reqBuilder.tagging(Tagging.builder().tagSet(tags).build());
|
||||
}
|
||||
if( kmsKeyId != null ) {
|
||||
reqBuilder.ssekmsKeyId(kmsKeyId);
|
||||
}
|
||||
if( storageEncryption!=null ) {
|
||||
reqBuilder.serverSideEncryption(storageEncryption);
|
||||
}
|
||||
if( contentType!=null ) {
|
||||
reqBuilder.contentType(contentType);
|
||||
}
|
||||
PutObjectRequest req = reqBuilder.build();
|
||||
if( log.isTraceEnabled() ) {
|
||||
log.trace("S3 PutObject request {}", req);
|
||||
}
|
||||
try {
|
||||
return runWithPermit(() -> client.putObject(req, RequestBody.fromInputStream(inputStream, contentLength)));
|
||||
} catch (SdkException e) {
|
||||
throw convertAwsException(e, "putObject", bucket, keyName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see software.amazon.awssdk.services.s3.S3Client#deleteObject
|
||||
*/
|
||||
public void deleteObject(String bucket, String key) throws IOException {
|
||||
try {
|
||||
runWithPermit(() -> client.deleteObject(DeleteObjectRequest.builder().bucket(bucket).key(key).build()));
|
||||
} catch (SdkException e) {
|
||||
throw convertAwsException(e, "deleteObject", bucket, key);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see software.amazon.awssdk.services.s3.S3Client#getBucketAcl
|
||||
*/
|
||||
public AccessControlPolicy getBucketAcl(String bucket) throws IOException {
|
||||
try {
|
||||
GetBucketAclResponse response = runWithPermit(() -> client.getBucketAcl(GetBucketAclRequest.builder().bucket(bucket).build()));
|
||||
return AccessControlPolicy.builder().grants(response.grants()).owner(response.owner()).build();
|
||||
} catch (SdkException e) {
|
||||
throw convertAwsException(e, "getBucketAcl", bucket, null);
|
||||
}
|
||||
}
|
||||
|
||||
public void setCannedAcl(String acl) {
|
||||
if( acl==null )
|
||||
return;
|
||||
this.cannedAcl = AwsHelper.parseS3Acl(acl);
|
||||
log.debug("Setting S3 canned ACL={} [{}]", this.cannedAcl, acl);
|
||||
}
|
||||
|
||||
public void setKmsKeyId(String kmsKeyId) {
|
||||
if( kmsKeyId==null )
|
||||
return;
|
||||
this.kmsKeyId = kmsKeyId;
|
||||
log.debug("Setting S3 SSE kms Id={}", kmsKeyId);
|
||||
}
|
||||
|
||||
public void setStorageEncryption(String alg) {
|
||||
if( alg == null )
|
||||
return;
|
||||
this.storageEncryption = ServerSideEncryption.fromValue(alg);
|
||||
log.debug("Setting S3 SSE storage encryption algorithm={}", alg);
|
||||
}
|
||||
|
||||
public void setRequesterPaysEnabled(String requesterPaysEnabled) {
|
||||
if( requesterPaysEnabled == null )
|
||||
return;
|
||||
this.isRequesterPaysEnabled = Boolean.valueOf(requesterPaysEnabled);
|
||||
log.debug("Setting S3 requester pays enabled={}", isRequesterPaysEnabled);
|
||||
}
|
||||
|
||||
public ObjectCannedACL getCannedAcl() {
|
||||
return cannedAcl;
|
||||
}
|
||||
|
||||
public software.amazon.awssdk.services.s3.S3Client getClient() {
|
||||
return client;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see software.amazon.awssdk.services.s3.S3Client#getObjectAcl
|
||||
*/
|
||||
public AccessControlPolicy getObjectAcl(String bucketName, String key) throws IOException {
|
||||
try {
|
||||
GetObjectAclResponse response = runWithPermit(() -> client.getObjectAcl(GetObjectAclRequest.builder().bucket(bucketName).key(key).build()));
|
||||
return AccessControlPolicy.builder().grants(response.grants()).owner(response.owner()).build();
|
||||
} catch (SdkException e) {
|
||||
throw convertAwsException(e, "getObjectAcl", bucketName, key);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see software.amazon.awssdk.services.s3.S3Client#headObject
|
||||
*/
|
||||
public HeadObjectResponse getObjectMetadata(String bucketName, String key) throws IOException {
|
||||
try {
|
||||
return runWithPermit(() -> client.headObject(HeadObjectRequest.builder().bucket(bucketName).key(key).build()));
|
||||
} catch (SdkException e) {
|
||||
throw convertAwsException(e, "getObjectMetadata", bucketName, key);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see software.amazon.awssdk.services.s3.S3Client#headBucket
|
||||
*/
|
||||
public HeadBucketResponse getBucketMetadata(String bucketName) throws IOException {
|
||||
try {
|
||||
return runWithPermit(() -> client.headBucket(HeadBucketRequest.builder().bucket(bucketName).build()));
|
||||
} catch (SdkException e) {
|
||||
throw convertAwsException(e, "getBucketMetadata", bucketName, null);
|
||||
}
|
||||
}
|
||||
|
||||
public List<Tag> getObjectTags(String bucketName, String key) throws IOException {
|
||||
try {
|
||||
return runWithPermit(() -> client.getObjectTagging(GetObjectTaggingRequest.builder().bucket(bucketName).key(key).build()).tagSet());
|
||||
} catch (SdkException e) {
|
||||
throw convertAwsException(e, "getObjectTags", bucketName, key);
|
||||
}
|
||||
}
|
||||
|
||||
public String getObjectKmsKeyId(String bucketName, String key) throws IOException {
|
||||
return getObjectMetadata(bucketName, key).ssekmsKeyId();
|
||||
}
|
||||
|
||||
/**
|
||||
* @see software.amazon.awssdk.services.s3.S3Client#listObjectsV2Paginator
|
||||
*/
|
||||
public ListObjectsV2Iterable listObjectsV2Paginator(ListObjectsV2Request request) throws IOException {
|
||||
try {
|
||||
return runWithPermit(() -> client.listObjectsV2Paginator(request));
|
||||
} catch (SdkException e) {
|
||||
throw convertAwsException(e, "listObjects", request.bucket(), request.prefix());
|
||||
}
|
||||
}
|
||||
|
||||
// ===== transfer manager section =====
|
||||
|
||||
synchronized ExtendedS3TransferManager transferManager() {
|
||||
if( transferManager == null ) {
|
||||
transferPool = ThreadPoolManager.create("S3TransferManager");
|
||||
var delegate = S3TransferManager.builder()
|
||||
.s3Client(factory.getS3AsyncClient(S3AsyncClientConfiguration.create(props), global))
|
||||
.executor(transferPool)
|
||||
.build();
|
||||
transferManager = new ExtendedS3TransferManager(delegate, props);
|
||||
|
||||
}
|
||||
return transferManager;
|
||||
}
|
||||
|
||||
public void downloadFile(S3Path source, File target, long size) throws IOException {
|
||||
try {
|
||||
DownloadFileRequest downloadFileRequest = DownloadFileRequest.builder()
|
||||
.getObjectRequest(b -> b.bucket(source.getBucket()).key(source.getKey()))
|
||||
.destination(target)
|
||||
.build();
|
||||
transferManager().downloadFile(downloadFileRequest,size).completionFuture().get();
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new InterruptedIOException(String.format("S3 download file: s3://%s/%s cancelled", source.getBucket(), source.getKey()));
|
||||
} catch (ExecutionException e) {
|
||||
String msg = String.format("Exception thrown downloading S3 object s3://%s/%s", source.getBucket(), source.getKey());
|
||||
throw new IOException(msg, e.getCause());
|
||||
}
|
||||
}
|
||||
|
||||
private static void createDirectory(Path dir) throws IOException {
|
||||
try {
|
||||
Files.createDirectory(dir);
|
||||
} catch (FileAlreadyExistsException e) {
|
||||
log.trace("File already exists: " + dir);
|
||||
}
|
||||
}
|
||||
|
||||
public void downloadDirectory(S3Path source, File targetFile) throws IOException {
|
||||
//
|
||||
// the download directory method provided by the TransferManager replicates
|
||||
// the source files directory structure in the target path
|
||||
// see https://github.com/aws/aws-sdk-java/issues/1321
|
||||
//
|
||||
// just traverse to source path a copy all files
|
||||
//
|
||||
final Path target = targetFile.toPath();
|
||||
final Queue<OngoingFileDownload> allDownloads = new LinkedList<>();
|
||||
final InterruptedIOException[] traversalInterruption = {null};
|
||||
|
||||
FileVisitor<Path> visitor = new SimpleFileVisitor<Path>() {
|
||||
|
||||
public FileVisitResult preVisitDirectory(Path current, BasicFileAttributes attr) throws IOException {
|
||||
// get the *delta* path against the source path
|
||||
final Path rel = source.relativize(current);
|
||||
final String delta = rel != null ? rel.toString() : null;
|
||||
final Path newFolder = delta != null ? target.resolve(delta) : target;
|
||||
if(log.isTraceEnabled())
|
||||
log.trace("Download DIR: " + current + " -> " + newFolder);
|
||||
// this `copy` creates the new folder, but does not copy the contained files
|
||||
createDirectory(newFolder);
|
||||
return FileVisitResult.CONTINUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileVisitResult visitFile(Path current, BasicFileAttributes attr) throws IOException {
|
||||
// get the *delta* path against the source path
|
||||
final Path rel = source.relativize(current);
|
||||
final String delta = rel != null ? rel.toString() : null;
|
||||
final Path newFile = delta != null ? target.resolve(delta) : target;
|
||||
if( log.isTraceEnabled())
|
||||
log.trace("Download file: " + current + " -> "+ FilesEx.toUriString(newFile));
|
||||
try {
|
||||
S3Path s3Path = (S3Path)current;
|
||||
DownloadFileRequest downloadFileRequest = DownloadFileRequest.builder()
|
||||
.getObjectRequest(b -> b.bucket(s3Path.getBucket()).key(s3Path.getKey()))
|
||||
.destination(newFile)
|
||||
.build();
|
||||
FileDownload it = transferManager().downloadFile(downloadFileRequest, attr.size());
|
||||
allDownloads.add(new OngoingFileDownload(s3Path.getBucket(), s3Path.getKey(), it));
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
// Don't throw immediately - store the exception and continue to clean-up
|
||||
traversalInterruption[0] = new InterruptedIOException(String.format("S3 download directory: s3://%s/%s interrupted", source.getBucket(), source.getKey()));
|
||||
return FileVisitResult.TERMINATE;
|
||||
}
|
||||
return FileVisitResult.CONTINUE;
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
Files.walkFileTree(source, EnumSet.of(FileVisitOption.FOLLOW_LINKS), Integer.MAX_VALUE, visitor);
|
||||
} finally {
|
||||
cleanupQueuedDownloads(allDownloads, traversalInterruption[0], source);
|
||||
}
|
||||
}
|
||||
|
||||
private void cleanupQueuedDownloads(Queue<OngoingFileDownload> allDownloads, InterruptedIOException traversalInterruption, S3Path source) throws IOException {
|
||||
try {
|
||||
IOException firstException = null;
|
||||
while(!allDownloads.isEmpty()) {
|
||||
OngoingFileDownload current = allDownloads.poll();
|
||||
try {
|
||||
current.download.completionFuture().get();
|
||||
} catch (ExecutionException e) {
|
||||
Throwable cause = e.getCause();
|
||||
log.debug("Exception thrown downloading S3 object s3://{}/{}", current.bucket, current.key, cause);
|
||||
if (firstException == null) {
|
||||
firstException = new IOException(String.format("Transfer failed for s3://%s/%s", current.bucket, current.key), cause);
|
||||
} else {
|
||||
firstException.addSuppressed(cause);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Throw traversal interruption first if it occurred
|
||||
if (traversalInterruption != null) {
|
||||
if (firstException != null) {
|
||||
traversalInterruption.addSuppressed(firstException);
|
||||
}
|
||||
throw traversalInterruption;
|
||||
}
|
||||
|
||||
// Throw download failures if any occurred
|
||||
if (firstException != null) {
|
||||
throw new IOException(String.format("Some transfers from S3 download directory: s3://%s/%s failed", source.getBucket(), source.getKey()), firstException);
|
||||
}
|
||||
}
|
||||
catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
InterruptedIOException interruptedException = new InterruptedIOException(String.format("Interrupted while download directory s3://%s/%s", source.getBucket(), source.getKey()));
|
||||
if (traversalInterruption != null) {
|
||||
interruptedException.addSuppressed(traversalInterruption);
|
||||
}
|
||||
throw interruptedException;
|
||||
}
|
||||
}
|
||||
|
||||
public void uploadFile(File source, S3Path target) throws IOException {
|
||||
var req = PutObjectRequest.builder().bucket(target.getBucket()).key(target.getKey());
|
||||
preparePutObjectRequest(req, target.getTagsList(), target.getContentType(), target.getStorageClass());
|
||||
var uploadRequest = UploadFileRequest.builder().putObjectRequest(req.build()).source(source).build();
|
||||
try {
|
||||
transferManager().uploadFile(uploadRequest).completionFuture().get();
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new InterruptedIOException(String.format("S3 upload file: s3://%s/%s interrupted", target.getBucket(), target.getKey()));
|
||||
} catch (ExecutionException e) {
|
||||
String msg = String.format("Exception thrown uploading S3 object s3://%s/%s", target.getBucket(), target.getKey());
|
||||
throw new IOException(msg, e.getCause());
|
||||
}
|
||||
}
|
||||
|
||||
private Consumer<UploadFileRequest.Builder> transformUploadRequest(List<Tag> tags) {
|
||||
return builder -> builder.putObjectRequest(updateBuilder(builder.build().putObjectRequest().toBuilder(), tags).build());
|
||||
}
|
||||
|
||||
private PutObjectRequest.Builder updateBuilder(PutObjectRequest.Builder porBuilder, List<Tag> tags) {
|
||||
|
||||
if( cannedAcl != null )
|
||||
porBuilder.acl(cannedAcl);
|
||||
if( storageEncryption != null )
|
||||
porBuilder.serverSideEncryption(storageEncryption);
|
||||
if( kmsKeyId != null )
|
||||
porBuilder.ssekmsKeyId(kmsKeyId);
|
||||
if( tags != null && !tags.isEmpty() )
|
||||
porBuilder.tagging(Tagging.builder().tagSet(tags).build());
|
||||
return porBuilder;
|
||||
}
|
||||
|
||||
public void uploadDirectory(File source, S3Path target) throws IOException {
|
||||
UploadDirectoryRequest request = UploadDirectoryRequest.builder()
|
||||
.bucket(target.getBucket())
|
||||
.s3Prefix(target.getKey())
|
||||
.source(source.toPath())
|
||||
.uploadFileRequestTransformer(transformUploadRequest(target.getTagsList()))
|
||||
.build();
|
||||
|
||||
try {
|
||||
CompletedDirectoryUpload completed = transferManager().uploadDirectory(request).completionFuture().get();
|
||||
if (!completed.failedTransfers().isEmpty()) {
|
||||
log.debug("S3 upload directory: s3://{}/{} failed transfers", target.getBucket(), target.getKey());
|
||||
throw new IOException("Some transfers in S3 upload directory: s3://"+ target.getBucket() +"/"+ target.getKey() +" has failed - Transfers: " + completed.failedTransfers() );
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new InterruptedIOException(String.format("S3 upload directory: s3://%s/%s interrupted", target.getBucket(), target.getKey()));
|
||||
} catch (ExecutionException e) {
|
||||
String msg = String.format("Exception thrown uploading S3 object s3://%s/%s", target.getBucket(), target.getKey());
|
||||
throw new IOException(msg, e.getCause());
|
||||
}
|
||||
}
|
||||
|
||||
public void copyFile(CopyObjectRequest.Builder reqBuilder, List<Tag> tags, String contentType, String storageClass) throws IOException {
|
||||
if( tags !=null && !tags.isEmpty()) {
|
||||
log.debug("Setting tags: {}", tags);
|
||||
reqBuilder.taggingDirective(TaggingDirective.REPLACE);
|
||||
reqBuilder.tagging(Tagging.builder().tagSet(tags).build());
|
||||
}
|
||||
if( cannedAcl != null ) {
|
||||
reqBuilder.acl(cannedAcl);
|
||||
}
|
||||
if( storageEncryption != null ) {
|
||||
reqBuilder.serverSideEncryption(storageEncryption);
|
||||
}
|
||||
if( kmsKeyId !=null ) {
|
||||
reqBuilder.ssekmsKeyId(kmsKeyId);
|
||||
}
|
||||
if( contentType!=null ) {
|
||||
reqBuilder.metadataDirective(MetadataDirective.REPLACE);
|
||||
reqBuilder.contentType(contentType);
|
||||
}
|
||||
if( storageClass!=null ) {
|
||||
reqBuilder.storageClass(storageClass);
|
||||
}
|
||||
CopyObjectRequest req = reqBuilder.build();
|
||||
if( log.isTraceEnabled() ) {
|
||||
log.trace("S3 CopyObject request {}", req);
|
||||
}
|
||||
CopyRequest copyRequest = CopyRequest.builder().copyObjectRequest(req).build();
|
||||
try {
|
||||
transferManager().copy(copyRequest).completionFuture().get();
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new InterruptedIOException(String.format("S3 copy s3://%s/%s to s3://%s/%s interrupted", req.sourceBucket(), req.sourceKey(), req.destinationBucket(), req.destinationKey()));
|
||||
} catch (ExecutionException e) {
|
||||
String msg = String.format("Exception thrown copying S3 object form s3://%s/%s to s3://%s/%s", req.sourceBucket(), req.sourceKey(), req.destinationBucket(), req.destinationKey());
|
||||
throw new IOException(msg, e.getCause());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class OngoingFileDownload {
|
||||
String bucket;
|
||||
String key;
|
||||
FileDownload download;
|
||||
|
||||
public OngoingFileDownload(String bucket, String key, FileDownload download) {
|
||||
this.bucket = bucket;
|
||||
this.key = key;
|
||||
this.download = download;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio;
|
||||
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.nio.file.attribute.FileTime;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
public class S3FileAttributes implements BasicFileAttributes {
|
||||
|
||||
private final FileTime lastModifiedTime;
|
||||
private final long size;
|
||||
private final boolean directory;
|
||||
private final boolean regularFile;
|
||||
private final String key;
|
||||
|
||||
public S3FileAttributes(String key, FileTime lastModifiedTime, long size,
|
||||
boolean isDirectory, boolean isRegularFile) {
|
||||
this.key = key;
|
||||
this.lastModifiedTime = lastModifiedTime;
|
||||
this.size = size;
|
||||
directory = isDirectory;
|
||||
regularFile = isRegularFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileTime lastModifiedTime() {
|
||||
return lastModifiedTime;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileTime lastAccessTime() {
|
||||
return lastModifiedTime;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileTime creationTime() {
|
||||
return lastModifiedTime;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isRegularFile() {
|
||||
return regularFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isDirectory() {
|
||||
return directory;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSymbolicLink() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isOther() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object fileKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return format(
|
||||
"[%s: lastModified=%s, size=%s, isDirectory=%s, isRegularFile=%s]",
|
||||
key, lastModifiedTime, size, directory, regularFile);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.attribute.BasicFileAttributeView;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.nio.file.attribute.FileTime;
|
||||
|
||||
/**
|
||||
* Implements {@link BasicFileAttributeView} for S3 file storage
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
public class S3FileAttributesView implements BasicFileAttributeView {
|
||||
|
||||
private S3FileAttributes target;
|
||||
|
||||
S3FileAttributesView(S3FileAttributes target) {
|
||||
this.target = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return "basic";
|
||||
}
|
||||
|
||||
@Override
|
||||
public BasicFileAttributes readAttributes() throws IOException {
|
||||
return target;
|
||||
}
|
||||
|
||||
/**
|
||||
* This API is implemented is not supported but instead of throwing an exception just do nothing
|
||||
* to not break the method {@code java.nio.file.CopyMoveHelper#copyToForeignTarget(java.nio.file.Path, java.nio.file.Path, java.nio.file.CopyOption...)}
|
||||
*
|
||||
* @param lastModifiedTime
|
||||
* @param lastAccessTime
|
||||
* @param createTime
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public void setTimes(FileTime lastModifiedTime, FileTime lastAccessTime, FileTime createTime) throws IOException {
|
||||
// not supported
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,147 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.FileStore;
|
||||
import java.nio.file.FileSystem;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.PathMatcher;
|
||||
import java.nio.file.WatchService;
|
||||
import java.nio.file.attribute.UserPrincipalLookupService;
|
||||
import java.nio.file.spi.FileSystemProvider;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
||||
import software.amazon.awssdk.services.s3.model.Bucket;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
|
||||
public class S3FileSystem extends FileSystem {
|
||||
|
||||
private final S3FileSystemProvider provider;
|
||||
private final S3Client client;
|
||||
private final String endpoint;
|
||||
private final String bucketName;
|
||||
|
||||
private final Properties properties;
|
||||
|
||||
public S3FileSystem(S3FileSystemProvider provider, S3Client client, URI uri, Properties props) {
|
||||
this.provider = provider;
|
||||
this.client = client;
|
||||
this.endpoint = uri.getHost();
|
||||
this.bucketName = S3Path.bucketName(uri);
|
||||
this.properties = props;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileSystemProvider provider() {
|
||||
return provider;
|
||||
}
|
||||
|
||||
public Properties properties() {
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
this.provider.fileSystems.remove(bucketName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isOpen() {
|
||||
return this.provider.fileSystems.containsKey(bucketName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReadOnly() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSeparator() {
|
||||
return S3Path.PATH_SEPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterable<Path> getRootDirectories() {
|
||||
ImmutableList.Builder<Path> builder = ImmutableList.builder();
|
||||
try {
|
||||
for (Bucket bucket : client.listBuckets()) {
|
||||
builder.add(new S3Path(this, bucket.name()));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterable<FileStore> getFileStores() {
|
||||
return ImmutableList.of();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> supportedFileAttributeViews() {
|
||||
return ImmutableSet.of("basic");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getPath(String first, String... more) {
|
||||
if (more.length == 0) {
|
||||
return new S3Path(this, first);
|
||||
}
|
||||
|
||||
return new S3Path(this, first, more);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PathMatcher getPathMatcher(String syntaxAndPattern) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public UserPrincipalLookupService getUserPrincipalLookupService() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public WatchService newWatchService() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public S3Client getClient() {
|
||||
return client;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the endpoint associated with this fileSystem.
|
||||
*
|
||||
* @see <a href="http://docs.aws.amazon.com/general/latest/gr/rande.html">http://docs.aws.amazon.com/general/latest/gr/rande.html</a>
|
||||
* @return string
|
||||
*/
|
||||
public String getEndpoint() {
|
||||
return endpoint;
|
||||
}
|
||||
|
||||
public String getBucketName() {
|
||||
return bucketName;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,857 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.FilterInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.URI;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.SeekableByteChannel;
|
||||
import java.nio.file.AccessDeniedException;
|
||||
import java.nio.file.AccessMode;
|
||||
import java.nio.file.CopyOption;
|
||||
import java.nio.file.DirectoryNotEmptyException;
|
||||
import java.nio.file.DirectoryStream;
|
||||
import java.nio.file.FileAlreadyExistsException;
|
||||
import java.nio.file.FileStore;
|
||||
import java.nio.file.FileSystem;
|
||||
import java.nio.file.FileSystemAlreadyExistsException;
|
||||
import java.nio.file.FileSystemNotFoundException;
|
||||
import java.nio.file.FileSystems;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.LinkOption;
|
||||
import java.nio.file.NoSuchFileException;
|
||||
import java.nio.file.OpenOption;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.nio.file.attribute.BasicFileAttributeView;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.nio.file.attribute.FileAttribute;
|
||||
import java.nio.file.attribute.FileAttributeView;
|
||||
import java.nio.file.attribute.FileTime;
|
||||
import java.nio.file.spi.FileSystemProvider;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import software.amazon.awssdk.core.ResponseInputStream;
|
||||
import software.amazon.awssdk.services.s3.model.*;
|
||||
import software.amazon.awssdk.services.s3.model.S3Object;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Sets;
|
||||
import nextflow.cloud.aws.AwsClientFactory;
|
||||
import nextflow.cloud.aws.config.AwsConfig;
|
||||
import nextflow.cloud.aws.nio.util.IOUtils;
|
||||
import nextflow.cloud.aws.nio.util.S3MultipartOptions;
|
||||
import nextflow.cloud.aws.nio.util.S3ObjectId;
|
||||
import nextflow.cloud.aws.nio.util.S3ObjectSummaryLookup;
|
||||
import nextflow.extension.FilesEx;
|
||||
import nextflow.file.CopyOptions;
|
||||
import nextflow.file.FileHelper;
|
||||
import nextflow.file.FileSystemTransferAware;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static com.google.common.collect.Sets.difference;
|
||||
import static java.lang.String.format;
|
||||
|
||||
/**
|
||||
* Spec:
|
||||
*
|
||||
* URI: s3://[endpoint]/{bucket}/{key} If endpoint is missing, it's assumed to
|
||||
* be the default S3 endpoint (s3.amazonaws.com)
|
||||
*
|
||||
* FileSystem roots: /{bucket}/
|
||||
*
|
||||
* Treatment of S3 objects: - If a key ends in "/" it's considered a directory
|
||||
* *and* a regular file. Otherwise, it's just a regular file. - It is legal for
|
||||
* a key "xyz" and "xyz/" to exist at the same time. The latter is treated as a
|
||||
* directory. - If a file "a/b/c" exists but there's no "a" or "a/b/", these are
|
||||
* considered "implicit" directories. They can be listed, traversed and deleted.
|
||||
*
|
||||
* Deviations from FileSystem provider API: - Deleting a file or directory
|
||||
* always succeeds, regardless of whether the file/directory existed before the
|
||||
* operation was issued i.e. Files.delete() and Files.deleteIfExists() are
|
||||
* equivalent.
|
||||
*
|
||||
*
|
||||
* Future versions of this provider might allow for a strict mode that mimics
|
||||
* the semantics of the FileSystem provider API on a best effort basis, at an
|
||||
* increased processing cost.
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class S3FileSystemProvider extends FileSystemProvider implements FileSystemTransferAware {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(S3FileSystemProvider.class);
|
||||
|
||||
final Map<String, S3FileSystem> fileSystems = new HashMap<>();
|
||||
|
||||
private final S3ObjectSummaryLookup s3ObjectSummaryLookup = new S3ObjectSummaryLookup();
|
||||
|
||||
@Override
|
||||
public String getScheme() {
|
||||
return "s3";
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileSystem newFileSystem(URI uri, Map<String, ?> env) throws IOException {
|
||||
Preconditions.checkNotNull(uri, "uri is null");
|
||||
Preconditions.checkArgument(uri.getScheme().equals("s3"), "uri scheme must be 's3': '%s'", uri);
|
||||
|
||||
final String bucketName = S3Path.bucketName(uri);
|
||||
synchronized (fileSystems) {
|
||||
if( fileSystems.containsKey(bucketName))
|
||||
throw new FileSystemAlreadyExistsException("S3 filesystem already exists. Use getFileSystem() instead");
|
||||
|
||||
final AwsConfig awsConfig = new AwsConfig(env);
|
||||
//
|
||||
final S3FileSystem result = createFileSystem(uri, awsConfig);
|
||||
fileSystems.put(bucketName, result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileSystem getFileSystem(URI uri) {
|
||||
final String bucketName = S3Path.bucketName(uri);
|
||||
final FileSystem fileSystem = this.fileSystems.get(bucketName);
|
||||
|
||||
if (fileSystem == null) {
|
||||
throw new FileSystemNotFoundException("S3 filesystem not yet created. Use newFileSystem() instead");
|
||||
}
|
||||
|
||||
return fileSystem;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deviation from spec: throws FileSystemNotFoundException if FileSystem
|
||||
* hasn't yet been initialized. Call newFileSystem() first.
|
||||
* Need credentials. Maybe set credentials after? how?
|
||||
*/
|
||||
@Override
|
||||
public Path getPath(URI uri) {
|
||||
Preconditions.checkArgument(uri.getScheme().equals(getScheme()),"URI scheme must be %s", getScheme());
|
||||
return getFileSystem(uri).getPath(uri.getPath());
|
||||
}
|
||||
|
||||
@Override
|
||||
public DirectoryStream<Path> newDirectoryStream(Path dir, DirectoryStream.Filter<? super Path> filter) throws IOException {
|
||||
|
||||
Preconditions.checkArgument(dir instanceof S3Path,"path must be an instance of %s", S3Path.class.getName());
|
||||
final S3Path s3Path = (S3Path) dir;
|
||||
|
||||
return new DirectoryStream<Path>() {
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
// nothing to do here
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<Path> iterator() {
|
||||
return new S3Iterator(s3Path.getFileSystem(), s3Path.getBucket(), s3Path.getKey() + "/");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream newInputStream(Path path, OpenOption... options)
|
||||
throws IOException {
|
||||
Preconditions.checkArgument(options.length == 0,
|
||||
"OpenOptions not yet supported: %s",
|
||||
ImmutableList.copyOf(options)); // TODO
|
||||
|
||||
Preconditions.checkArgument(path instanceof S3Path,
|
||||
"path must be an instance of %s", S3Path.class.getName());
|
||||
S3Path s3Path = (S3Path) path;
|
||||
|
||||
Preconditions.checkArgument(!s3Path.getKey().equals(""),
|
||||
"cannot create InputStream for root directory: %s", FilesEx.toUriString(s3Path));
|
||||
|
||||
final ResponseInputStream<GetObjectResponse> result = s3Path
|
||||
.getFileSystem()
|
||||
.getClient()
|
||||
.getObject(s3Path.getBucket(), s3Path.getKey());
|
||||
|
||||
if (result == null)
|
||||
throw new IOException(String.format("The specified path is a directory: %s", FilesEx.toUriString(s3Path)));
|
||||
|
||||
// Wrap the response stream so that close() aborts the underlying HTTP connection
|
||||
// instead of draining the remaining bytes. Apache HTTP client's ContentLengthInputStream.close()
|
||||
// reads to end-of-stream to release the connection back to the pool, which for a large S3
|
||||
// object (e.g. a multi-GB FASTQ) can block the caller for many minutes. Callers of
|
||||
// newInputStream() typically do not consume the whole object, so abort() is the correct
|
||||
// semantics here.
|
||||
return new FilterInputStream(result) {
|
||||
@Override
|
||||
public void close() {
|
||||
result.abort();
|
||||
}
|
||||
// Just-used for testing
|
||||
void abort() {
|
||||
result.abort();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public OutputStream newOutputStream(final Path path, final OpenOption... options) throws IOException {
|
||||
Preconditions.checkArgument(path instanceof S3Path, "path must be an instance of %s", S3Path.class.getName());
|
||||
S3Path s3Path = (S3Path)path;
|
||||
|
||||
// validate options
|
||||
if (options.length > 0) {
|
||||
Set<OpenOption> opts = new LinkedHashSet<>(Arrays.asList(options));
|
||||
|
||||
// cannot handle APPEND here -> use newByteChannel() implementation
|
||||
if (opts.contains(StandardOpenOption.APPEND)) {
|
||||
return super.newOutputStream(path, options);
|
||||
}
|
||||
|
||||
if (opts.contains(StandardOpenOption.READ)) {
|
||||
throw new IllegalArgumentException("READ not allowed");
|
||||
}
|
||||
|
||||
boolean create = opts.remove(StandardOpenOption.CREATE);
|
||||
boolean createNew = opts.remove(StandardOpenOption.CREATE_NEW);
|
||||
boolean truncateExisting = opts.remove(StandardOpenOption.TRUNCATE_EXISTING);
|
||||
|
||||
// remove irrelevant/ignored options
|
||||
opts.remove(StandardOpenOption.WRITE);
|
||||
opts.remove(StandardOpenOption.SPARSE);
|
||||
|
||||
if (!opts.isEmpty()) {
|
||||
throw new UnsupportedOperationException(opts.iterator().next() + " not supported");
|
||||
}
|
||||
|
||||
if (!(create && truncateExisting)) {
|
||||
if (exists(s3Path)) {
|
||||
if (createNew || !truncateExisting) {
|
||||
throw new FileAlreadyExistsException(FilesEx.toUriString(s3Path));
|
||||
}
|
||||
} else {
|
||||
if (!createNew && !create) {
|
||||
throw new NoSuchFileException(FilesEx.toUriString(s3Path));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return createUploaderOutputStream(s3Path);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canUpload(Path source, Path target) {
|
||||
return FileSystems.getDefault().equals(source.getFileSystem()) && target instanceof S3Path;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canDownload(Path source, Path target) {
|
||||
return source instanceof S3Path && FileSystems.getDefault().equals(target.getFileSystem());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void download(Path remoteFile, Path localDestination, CopyOption... options) throws IOException {
|
||||
final S3Path source = (S3Path)remoteFile;
|
||||
|
||||
final CopyOptions opts = CopyOptions.parse(options);
|
||||
// delete target if it exists and REPLACE_EXISTING is specified
|
||||
if (opts.replaceExisting()) {
|
||||
FileHelper.deletePath(localDestination);
|
||||
}
|
||||
else if (Files.exists(localDestination))
|
||||
throw new FileAlreadyExistsException(localDestination.toString());
|
||||
|
||||
// Read S3 file attributes (metadata) for the source path, returns Optional.empty() if file doesn't exist
|
||||
final Optional<S3FileAttributes> attrs = readAttr1(source);
|
||||
// Extract directory status from attributes, defaulting to false if no attributes found
|
||||
final boolean isDir = attrs.map(S3FileAttributes::isDirectory).orElse(false);
|
||||
// Get file size only for non-directories (directories have size 0), defaulting to 0L if no attributes
|
||||
final long size = attrs.filter(a -> !a.isDirectory()).map(S3FileAttributes::size).orElse(0L);
|
||||
final String type = isDir ? "directory": "file";
|
||||
final S3Client s3Client = source.getFileSystem().getClient();
|
||||
log.debug("S3 download {} from={} to={} size={}", type, FilesEx.toUriString(source), localDestination, size);
|
||||
if( isDir ) {
|
||||
s3Client.downloadDirectory(source, localDestination.toFile());
|
||||
}
|
||||
else if( size > 0 ) {
|
||||
s3Client.downloadFile(source, localDestination.toFile(), size);
|
||||
}
|
||||
else {
|
||||
Files.deleteIfExists(localDestination);
|
||||
Files.createFile(localDestination);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void upload(Path localFile, Path remoteDestination, CopyOption... options) throws IOException {
|
||||
final S3Path target = (S3Path) remoteDestination;
|
||||
|
||||
CopyOptions opts = CopyOptions.parse(options);
|
||||
LinkOption[] linkOptions = (opts.followLinks()) ? new LinkOption[0] : new LinkOption[] { LinkOption.NOFOLLOW_LINKS };
|
||||
|
||||
// attributes of source file
|
||||
if (Files.readAttributes(localFile, BasicFileAttributes.class, linkOptions).isSymbolicLink())
|
||||
throw new IOException("Uploading of symbolic links not supported - offending path: " + localFile);
|
||||
|
||||
final Optional<S3FileAttributes> attrs = readAttr1(target);
|
||||
final boolean exits = attrs.isPresent();
|
||||
|
||||
// delete target if it exists and REPLACE_EXISTING is specified
|
||||
if (opts.replaceExisting()) {
|
||||
FileHelper.deletePath(target);
|
||||
}
|
||||
else if ( exits )
|
||||
throw new FileAlreadyExistsException(target.toString());
|
||||
|
||||
final boolean isDir = Files.isDirectory(localFile);
|
||||
final String type = isDir ? "directory": "file";
|
||||
log.debug("S3 upload {} from={} to={}", type, localFile, FilesEx.toUriString(target));
|
||||
final S3Client s3Client = target.getFileSystem().getClient();
|
||||
if( isDir ) {
|
||||
s3Client.uploadDirectory(localFile.toFile(), target);
|
||||
}
|
||||
else {
|
||||
s3Client.uploadFile(localFile.toFile(), target);
|
||||
}
|
||||
}
|
||||
|
||||
private S3OutputStream createUploaderOutputStream( S3Path fileToUpload ) {
|
||||
S3Client s3 = fileToUpload.getFileSystem().getClient();
|
||||
Properties props = fileToUpload.getFileSystem().properties();
|
||||
|
||||
final String storageClass = fileToUpload.getStorageClass()!=null ? fileToUpload.getStorageClass() : props.getProperty("upload_storage_class");
|
||||
final S3MultipartOptions opts = props != null ? new S3MultipartOptions(props) : new S3MultipartOptions();
|
||||
final S3ObjectId objectId = fileToUpload.toS3ObjectId();
|
||||
S3OutputStream stream = new S3OutputStream(s3.getClient(), objectId, opts)
|
||||
.setCannedAcl(s3.getCannedAcl())
|
||||
.setStorageClass(storageClass)
|
||||
.setStorageEncryption(props.getProperty("storage_encryption"))
|
||||
.setKmsKeyId(props.getProperty("storage_kms_key_id"))
|
||||
.setContentType(fileToUpload.getContentType())
|
||||
.setTags(fileToUpload.getTagsList());
|
||||
return stream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekableByteChannel newByteChannel(Path path,
|
||||
Set<? extends OpenOption> options, FileAttribute<?>... attrs)
|
||||
throws IOException {
|
||||
Preconditions.checkArgument(path instanceof S3Path,
|
||||
"path must be an instance of %s", S3Path.class.getName());
|
||||
final S3Path s3Path = (S3Path) path;
|
||||
// we resolve to a file inside the temp folder with the s3path name
|
||||
final Path tempFile = createTempDir().resolve(path.getFileName().toString());
|
||||
|
||||
try {
|
||||
InputStream is = s3Path.getFileSystem().getClient()
|
||||
.getObject(s3Path.getBucket(), s3Path.getKey());
|
||||
|
||||
if (is == null)
|
||||
throw new IOException(String.format("The specified path is a directory: %s", path));
|
||||
|
||||
Files.write(tempFile, IOUtils.toByteArray(is));
|
||||
}
|
||||
catch (NoSuchFileException e) {
|
||||
// When opening for CREATE/CREATE_NEW the remote object is allowed to not exist yet
|
||||
// — the temp file will be created and uploaded on close. For any other open mode
|
||||
// propagate the original exception so the caller sees the real s3:// path.
|
||||
if (!options.contains(StandardOpenOption.CREATE) && !options.contains(StandardOpenOption.CREATE_NEW)) {
|
||||
throw e;
|
||||
}
|
||||
log.trace("S3 object does not exist yet, will be created on close: {}", FilesEx.toUriString(s3Path));
|
||||
}
|
||||
|
||||
// and we can use the File SeekableByteChannel implementation
|
||||
final SeekableByteChannel seekable = Files .newByteChannel(tempFile, options);
|
||||
final List<Tag> tags = ((S3Path) path).getTagsList();
|
||||
final String contentType = ((S3Path) path).getContentType();
|
||||
|
||||
return new SeekableByteChannel() {
|
||||
@Override
|
||||
public boolean isOpen() {
|
||||
return seekable.isOpen();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
|
||||
if (!seekable.isOpen()) {
|
||||
return;
|
||||
}
|
||||
seekable.close();
|
||||
// upload the content where the seekable ends (close)
|
||||
if (Files.exists(tempFile)) {
|
||||
try (InputStream stream = Files.newInputStream(tempFile)) {
|
||||
/*
|
||||
FIXME: if the stream is {@link InputStream#markSupported()} i can reuse the same stream
|
||||
and evict the close and open methods of probeContentType. By this way:
|
||||
metadata.setContentType(new Tika().detect(stream, tempFile.getFileName().toString()));
|
||||
*/
|
||||
s3Path.getFileSystem()
|
||||
.getClient()
|
||||
.putObject(s3Path.getBucket(), s3Path.getKey(), stream, tags, contentType, Files.size(tempFile));
|
||||
}
|
||||
}
|
||||
else {
|
||||
// delete: check option delete_on_close
|
||||
s3Path.getFileSystem().
|
||||
getClient().deleteObject(s3Path.getBucket(), s3Path.getKey());
|
||||
}
|
||||
// and delete the temp dir
|
||||
Files.deleteIfExists(tempFile);
|
||||
Files.deleteIfExists(tempFile.getParent());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int write(ByteBuffer src) throws IOException {
|
||||
return seekable.write(src);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekableByteChannel truncate(long size) throws IOException {
|
||||
return seekable.truncate(size);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size() throws IOException {
|
||||
return seekable.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(ByteBuffer dst) throws IOException {
|
||||
return seekable.read(dst);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekableByteChannel position(long newPosition)
|
||||
throws IOException {
|
||||
return seekable.position(newPosition);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return seekable.position();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Deviations from spec: Does not perform atomic check-and-create. Since a
|
||||
* directory is just an S3 object, all directories in the hierarchy are
|
||||
* created or it already existed.
|
||||
*/
|
||||
@Override
|
||||
public void createDirectory(Path dir, FileAttribute<?>... attrs)
|
||||
throws IOException {
|
||||
|
||||
// FIXME: throw exception if the same key already exists at amazon s3
|
||||
|
||||
S3Path s3Path = (S3Path) dir;
|
||||
|
||||
Preconditions.checkArgument(attrs.length == 0,
|
||||
"attrs not yet supported: %s", ImmutableList.copyOf(attrs)); // TODO
|
||||
|
||||
// Creating a bucket is not supported
|
||||
if (s3Path.getKey().isEmpty()) {
|
||||
throw new UnsupportedOperationException("Creating a bucket is not supported");
|
||||
}
|
||||
|
||||
List<Tag> tags = s3Path.getTagsList();
|
||||
|
||||
String keyName = s3Path.getKey()
|
||||
+ (s3Path.getKey().endsWith("/") ? "" : "/");
|
||||
|
||||
s3Path.getFileSystem()
|
||||
.getClient()
|
||||
.putObject(s3Path.getBucket(), keyName, new ByteArrayInputStream(new byte[0]), tags, null, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void delete(Path path) throws IOException {
|
||||
Preconditions.checkArgument(path instanceof S3Path,
|
||||
"path must be an instance of %s", S3Path.class.getName());
|
||||
|
||||
S3Path s3Path = (S3Path) path;
|
||||
|
||||
if (Files.notExists(path)) {
|
||||
throw new NoSuchFileException("the path: " + FilesEx.toUriString(s3Path) + " does not exist");
|
||||
}
|
||||
|
||||
// Deleting a bucket is not supported
|
||||
if (s3Path.getKey().isEmpty()) {
|
||||
throw new UnsupportedOperationException("Deleting a bucket is not supported");
|
||||
}
|
||||
|
||||
// NOTE: S3 directories are virtual (marker objects or implied key prefixes),
|
||||
// so we do not check for emptiness before deleting. Enforcing POSIX-like
|
||||
// DirectoryNotEmptyException semantics on S3 is unreliable due to eventual
|
||||
// consistency and unnecessary because deleting a directory marker does not
|
||||
// affect its children.
|
||||
|
||||
// we delete the two objects (sometimes exists the key '/' and sometimes not)
|
||||
s3Path.getFileSystem().getClient()
|
||||
.deleteObject(s3Path.getBucket(), s3Path.getKey());
|
||||
s3Path.getFileSystem().getClient()
|
||||
.deleteObject(s3Path.getBucket(), s3Path.getKey() + "/");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(Path source, Path target, CopyOption... options)
|
||||
throws IOException {
|
||||
Preconditions.checkArgument(source instanceof S3Path,
|
||||
"source must be an instance of %s", S3Path.class.getName());
|
||||
Preconditions.checkArgument(target instanceof S3Path,
|
||||
"target must be an instance of %s", S3Path.class.getName());
|
||||
|
||||
if (isSameFile(source, target)) {
|
||||
return;
|
||||
}
|
||||
|
||||
S3Path s3Source = (S3Path) source;
|
||||
S3Path s3Target = (S3Path) target;
|
||||
/*
|
||||
* Preconditions.checkArgument(!s3Source.isDirectory(),
|
||||
* "copying directories is not yet supported: %s", source); // TODO
|
||||
* Preconditions.checkArgument(!s3Target.isDirectory(),
|
||||
* "copying directories is not yet supported: %s", target); // TODO
|
||||
*/
|
||||
ImmutableSet<CopyOption> actualOptions = ImmutableSet.copyOf(options);
|
||||
verifySupportedOptions(EnumSet.of(StandardCopyOption.REPLACE_EXISTING),
|
||||
actualOptions);
|
||||
|
||||
if (!actualOptions.contains(StandardCopyOption.REPLACE_EXISTING)) {
|
||||
if (exists(s3Target)) {
|
||||
throw new FileAlreadyExistsException(format(
|
||||
"target already exists: %s", FilesEx.toUriString(s3Target)));
|
||||
}
|
||||
}
|
||||
|
||||
S3Client client = s3Source.getFileSystem() .getClient();
|
||||
final List<Tag> tags = ((S3Path) target).getTagsList();
|
||||
final String contentType = ((S3Path) target).getContentType();
|
||||
final String storageClass = ((S3Path) target).getStorageClass();
|
||||
|
||||
//TransferManager alternative
|
||||
CopyObjectRequest.Builder reqBuilder = CopyObjectRequest.builder()
|
||||
.sourceBucket(s3Source.getBucket())
|
||||
.sourceKey(s3Source.getKey())
|
||||
.destinationBucket(s3Target.getBucket())
|
||||
.destinationKey(s3Target.getKey());
|
||||
log.trace("Copy file via copy object - source: source={}, target={}, tags={}, storageClass={}", s3Source, s3Target, tags, storageClass);
|
||||
client.copyFile(reqBuilder, tags, contentType, storageClass);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void move(Path source, Path target, CopyOption... options) throws IOException {
|
||||
for( CopyOption it : options ) {
|
||||
if( it==StandardCopyOption.ATOMIC_MOVE )
|
||||
throw new IllegalArgumentException("Atomic move not supported by S3 file system provider");
|
||||
}
|
||||
copy(source,target,options);
|
||||
delete(source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSameFile(Path path1, Path path2) throws IOException {
|
||||
return path1.isAbsolute() && path2.isAbsolute() && path1.equals(path2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHidden(Path path) throws IOException {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileStore getFileStore(Path path) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkAccess(Path path, AccessMode... modes) throws IOException {
|
||||
S3Path s3Path = (S3Path) path;
|
||||
Preconditions.checkArgument(s3Path.isAbsolute(),
|
||||
"path must be absolute: %s", s3Path);
|
||||
|
||||
S3Client client = s3Path.getFileSystem().getClient();
|
||||
|
||||
if( modes==null || modes.length==0 ) {
|
||||
// when no modes are given, the method is invoked
|
||||
// by `Files.exists` method, therefore just use summary lookup
|
||||
s3ObjectSummaryLookup.lookup((S3Path)path);
|
||||
return;
|
||||
}
|
||||
|
||||
// get ACL and check if the file exists as a side-effect
|
||||
AccessControlPolicy acl = getAccessControl(s3Path);
|
||||
String caller = client.getCallerAccount();
|
||||
for (AccessMode accessMode : modes) {
|
||||
switch (accessMode) {
|
||||
case EXECUTE:
|
||||
throw new AccessDeniedException(s3Path.toString(), null,
|
||||
"file is not executable");
|
||||
case READ:
|
||||
if (caller == null) {
|
||||
//if we cannot get the user's canonical ID, try read the object;
|
||||
s3ObjectSummaryLookup.lookup((S3Path) path);
|
||||
}
|
||||
else if (!hasPermissions(acl, caller,
|
||||
EnumSet.of(Permission.FULL_CONTROL, Permission.READ))) {
|
||||
throw new AccessDeniedException(s3Path.toString(), null,
|
||||
"file is not readable");
|
||||
}
|
||||
break;
|
||||
case WRITE:
|
||||
if (caller == null) {
|
||||
log.warn("User's Canonical Id cannot be retrieved. We can not check the access.");
|
||||
}
|
||||
else if (!hasPermissions(acl, caller,
|
||||
EnumSet.of(Permission.FULL_CONTROL, Permission.WRITE))) {
|
||||
throw new AccessDeniedException(s3Path.toString(), null,
|
||||
format("bucket '%s' is not writable",
|
||||
s3Path.getBucket()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* check if the param acl has the same owner than the parameter owner and
|
||||
* have almost one of the permission set in the parameter permissions
|
||||
* @param acl
|
||||
* @param owner
|
||||
* @param permissions almost one
|
||||
* @return
|
||||
*/
|
||||
private boolean hasPermissions(AccessControlPolicy acl, String owner,
|
||||
EnumSet<Permission> permissions) {
|
||||
boolean result = false;
|
||||
for (Grant grant : acl.grants()) {
|
||||
if (grant.grantee().id().equals(owner)
|
||||
&& permissions.contains(grant.permission())) {
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <V extends FileAttributeView> V getFileAttributeView(Path path, Class<V> type, LinkOption... options) {
|
||||
Preconditions.checkArgument(path instanceof S3Path,
|
||||
"path must be an instance of %s", S3Path.class.getName());
|
||||
S3Path s3Path = (S3Path) path;
|
||||
if (type.isAssignableFrom(BasicFileAttributeView.class)) {
|
||||
try {
|
||||
return (V) new S3FileAttributesView(readAttr0(s3Path));
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException("Unable read attributes for file: " + FilesEx.toUriString(s3Path), e);
|
||||
}
|
||||
}
|
||||
log.trace("Unsupported S3 file system provider file attribute view: " + type.getName());
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public <A extends BasicFileAttributes> A readAttributes(Path path, Class<A> type, LinkOption... options) throws IOException {
|
||||
Preconditions.checkArgument(path instanceof S3Path,
|
||||
"path must be an instance of %s", S3Path.class.getName());
|
||||
S3Path s3Path = (S3Path) path;
|
||||
if (type.isAssignableFrom(BasicFileAttributes.class)) {
|
||||
return (A) ("".equals(s3Path.getKey())
|
||||
// the root bucket is implicitly a directory
|
||||
? new S3FileAttributes("/", null, 0, true, false)
|
||||
// read the target path attributes
|
||||
: readAttr0(s3Path));
|
||||
}
|
||||
// not support attribute class
|
||||
throw new UnsupportedOperationException(format("only %s supported", BasicFileAttributes.class));
|
||||
}
|
||||
|
||||
private Optional<S3FileAttributes> readAttr1(S3Path s3Path) throws IOException {
|
||||
try {
|
||||
return Optional.of(readAttr0(s3Path));
|
||||
}
|
||||
catch (NoSuchFileException e) {
|
||||
return Optional.<S3FileAttributes>empty();
|
||||
}
|
||||
}
|
||||
|
||||
private S3FileAttributes readAttr0(S3Path s3Path) throws IOException {
|
||||
S3Object objectSummary = s3ObjectSummaryLookup.lookup(s3Path);
|
||||
|
||||
// parse the data to BasicFileAttributes.
|
||||
FileTime lastModifiedTime = null;
|
||||
if( objectSummary.lastModified() != null ) {
|
||||
lastModifiedTime = FileTime.from(objectSummary.lastModified().toEpochMilli(), TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
long size = objectSummary.size();
|
||||
boolean directory = false;
|
||||
boolean regularFile = false;
|
||||
String key = objectSummary.key();
|
||||
// check if is a directory and the key of this directory exists in amazon s3
|
||||
if (objectSummary.key().equals(s3Path.getKey() + "/") && objectSummary.key().endsWith("/")) {
|
||||
directory = true;
|
||||
}
|
||||
// is a directory but does not exist in amazon s3
|
||||
else if ((!objectSummary.key().equals(s3Path.getKey()) || "".equals(s3Path.getKey())) && objectSummary.key().startsWith(s3Path.getKey())) {
|
||||
directory = true;
|
||||
// no metadata, we fake one
|
||||
size = 0;
|
||||
// delete extra part
|
||||
key = s3Path.getKey() + "/";
|
||||
}
|
||||
// is a file:
|
||||
else {
|
||||
regularFile = true;
|
||||
}
|
||||
|
||||
return new S3FileAttributes(key, lastModifiedTime, size, directory, regularFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> readAttributes(Path path, String attributes, LinkOption... options) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setAttribute(Path path, String attribute, Object value,
|
||||
LinkOption... options) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
protected S3FileSystem createFileSystem(URI uri, AwsConfig awsConfig) {
|
||||
// try to load amazon props
|
||||
Properties props = loadAmazonProperties();
|
||||
// add properties for legacy compatibility
|
||||
props.putAll(awsConfig.getS3LegacyProperties());
|
||||
|
||||
final String bucketName = S3Path.bucketName(uri);
|
||||
// do not use `global` flag for custom endpoint because
|
||||
// when enabling that flag, it overrides S3 endpoints with AWS global endpoint
|
||||
// see https://github.com/nextflow-io/nextflow/pull/5779
|
||||
final boolean global = bucketName!=null && !awsConfig.getS3Config().isCustomEndpoint();
|
||||
final AwsClientFactory factory = new AwsClientFactory(awsConfig, awsConfig.resolveS3Region());
|
||||
final S3Client client = new S3Client(factory, props, global);
|
||||
|
||||
// set the client acl
|
||||
client.setCannedAcl(getProp(props, "s_3_acl", "s3_acl", "s3acl", "s3Acl"));
|
||||
client.setStorageEncryption(props.getProperty("storage_encryption"));
|
||||
client.setKmsKeyId(props.getProperty("storage_kms_key_id"));
|
||||
client.setRequesterPaysEnabled(props.getProperty("requester_pays"));
|
||||
|
||||
if( props.getProperty("glacier_auto_retrieval") != null )
|
||||
log.warn("Glacier auto-retrieval is no longer supported, config option `aws.client.glacierAutoRetrieval` will be ignored");
|
||||
|
||||
return new S3FileSystem(this, client, uri, props);
|
||||
}
|
||||
|
||||
protected String getProp(Properties props, String... keys) {
|
||||
for( String k : keys ) {
|
||||
if( props.containsKey(k) ) {
|
||||
return props.getProperty(k);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* find /amazon.properties in the classpath
|
||||
* @return Properties amazon.properties
|
||||
*/
|
||||
protected Properties loadAmazonProperties() {
|
||||
Properties props = new Properties();
|
||||
// http://www.javaworld.com/javaworld/javaqa/2003-06/01-qa-0606-load.html
|
||||
// http://www.javaworld.com/javaqa/2003-08/01-qa-0808-property.html
|
||||
try(InputStream in = Thread.currentThread().getContextClassLoader().getResourceAsStream("amazon.properties")) {
|
||||
if (in != null) {
|
||||
props.load(in);
|
||||
}
|
||||
|
||||
} catch (IOException e) {}
|
||||
|
||||
return props;
|
||||
}
|
||||
|
||||
// ~~~
|
||||
|
||||
private <T> void verifySupportedOptions(Set<? extends T> allowedOptions,
|
||||
Set<? extends T> actualOptions) {
|
||||
Sets.SetView<? extends T> unsupported = difference(actualOptions,
|
||||
allowedOptions);
|
||||
Preconditions.checkArgument(unsupported.isEmpty(),
|
||||
"the following options are not supported: %s", unsupported);
|
||||
}
|
||||
/**
|
||||
* check that the paths exists or not
|
||||
* @param path S3Path
|
||||
* @return true if exists
|
||||
*/
|
||||
private boolean exists(S3Path path) {
|
||||
try {
|
||||
s3ObjectSummaryLookup.lookup(path);
|
||||
return true;
|
||||
}
|
||||
catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Control List, if the path does not exist
|
||||
* (because the path is a directory and this key isn't created at amazon s3)
|
||||
* then return the ACL of the first child.
|
||||
*
|
||||
* @param path {@link S3Path}
|
||||
* @return AccessControlList
|
||||
* @throws IOException if error getting access control
|
||||
*/
|
||||
private AccessControlPolicy getAccessControl(S3Path path) throws IOException {
|
||||
String key = path.getKey();
|
||||
if (key == null || key.isEmpty())
|
||||
return path.getFileSystem().getClient().getBucketAcl(path.getBucket());
|
||||
return path.getFileSystem().getClient().getObjectAcl(path.getBucket(), key);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a temporal directory to create streams
|
||||
* @return Path temporal folder
|
||||
* @throws IOException
|
||||
*/
|
||||
protected Path createTempDir() throws IOException {
|
||||
return Files.createTempDirectory("temp-s3-");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import software.amazon.awssdk.services.s3.model.*;
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
* S3 iterator over folders at first level.
|
||||
* Future versions of this class should be return the elements
|
||||
* in a incremental way when the #next() method is called.
|
||||
*/
|
||||
public class S3Iterator implements Iterator<Path> {
|
||||
|
||||
private S3FileSystem s3FileSystem;
|
||||
private String bucket;
|
||||
private String key;
|
||||
|
||||
private Iterator<S3Path> it;
|
||||
|
||||
public S3Iterator(S3FileSystem s3FileSystem, String bucket, String key) {
|
||||
|
||||
Preconditions.checkArgument(key != null && key.endsWith("/"), "key %s should be ended with slash '/'", key);
|
||||
|
||||
this.bucket = bucket;
|
||||
// the only case i dont need the end slash is to list buckets content
|
||||
this.key = key.length() == 1 ? "" : key;
|
||||
this.s3FileSystem = s3FileSystem;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public S3Path next() {
|
||||
return getIterator().next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return getIterator().hasNext();
|
||||
}
|
||||
|
||||
private Iterator<S3Path> getIterator() {
|
||||
if (it == null) {
|
||||
ListObjectsV2Request request = buildRequest();
|
||||
|
||||
S3Client s3Client = s3FileSystem.getClient();
|
||||
|
||||
// This automatically handles pagination
|
||||
try {
|
||||
it = s3Client.listObjectsV2Paginator(request).stream().flatMap(r -> parseObjectListing(r).stream()).iterator();
|
||||
}catch( IOException e ){
|
||||
throw new UncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
return it;
|
||||
}
|
||||
|
||||
private ListObjectsV2Request buildRequest(){
|
||||
|
||||
return ListObjectsV2Request.builder()
|
||||
.bucket(bucket)
|
||||
.prefix(key)
|
||||
.delimiter("/")
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* add to the listPath the elements at the same level that s3Path
|
||||
* @param current ListObjectsResponseto walk
|
||||
*/
|
||||
private List<S3Path> parseObjectListing( ListObjectsV2Response current) {
|
||||
List<S3Path> listPath = new ArrayList<>();
|
||||
// add all the objects i.e. the files, except iterator key.
|
||||
// In V2, object listing is also returning the key of the request. Skip it from the iterator to avoid loops.
|
||||
for (final S3Object objectSummary : current.contents()) {
|
||||
final String key = objectSummary.key();
|
||||
if( this.key.equals(key)) continue;
|
||||
final S3Path path = new S3Path(s3FileSystem, "/" + bucket, key.split("/"));
|
||||
path.setObjectSummary(objectSummary);
|
||||
listPath.add(path);
|
||||
}
|
||||
|
||||
// add all the common prefixes i.e. the directories, except iterator key
|
||||
for(final CommonPrefix prefix : current.commonPrefixes()) {
|
||||
if( prefix.prefix().equals("/") || this.key.equals(prefix.prefix())) continue;
|
||||
listPath.add(new S3Path(s3FileSystem, "/" + bucket, prefix.prefix()));
|
||||
}
|
||||
return listPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* The current #buildRequest() get all subdirectories and her content.
|
||||
* This method filter the keyChild and check if is a immediate
|
||||
* descendant of the keyParent parameter
|
||||
* @param keyParent String
|
||||
* @param keyChild String
|
||||
* @return String parsed
|
||||
* or null when the keyChild and keyParent are the same and not have to be returned
|
||||
*/
|
||||
@Deprecated
|
||||
private String getInmediateDescendent(String keyParent, String keyChild){
|
||||
|
||||
keyParent = deleteExtraPath(keyParent);
|
||||
keyChild = deleteExtraPath(keyChild);
|
||||
|
||||
final int parentLen = keyParent.length();
|
||||
final String childWithoutParent = deleteExtraPath(keyChild
|
||||
.substring(parentLen));
|
||||
|
||||
String[] parts = childWithoutParent.split("/");
|
||||
|
||||
if (parts.length > 0 && !parts[0].isEmpty()){
|
||||
return keyParent + "/" + parts[0];
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
private String deleteExtraPath(String keyChild) {
|
||||
if (keyChild.startsWith("/")){
|
||||
keyChild = keyChild.substring(1);
|
||||
}
|
||||
if (keyChild.endsWith("/")){
|
||||
keyChild = keyChild.substring(0, keyChild.length() - 1);
|
||||
}
|
||||
return keyChild;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,662 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Base64;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.Phaser;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import software.amazon.awssdk.core.exception.SdkException;
|
||||
import software.amazon.awssdk.core.sync.RequestBody;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
import software.amazon.awssdk.services.s3.model.*;
|
||||
import nextflow.cloud.aws.nio.util.ByteBufferInputStream;
|
||||
import nextflow.cloud.aws.nio.util.S3MultipartOptions;
|
||||
import nextflow.cloud.aws.nio.util.S3ObjectId;
|
||||
import nextflow.util.ThreadPoolManager;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import static java.util.Objects.requireNonNull;
|
||||
|
||||
/**
|
||||
* Parallel S3 multipart uploader. Based on the following code request
|
||||
* See https://github.com/Upplication/Amazon-S3-FileSystem-NIO2/pulls
|
||||
*
|
||||
* @author Paolo Di Tommaso
|
||||
* @author Tom Wieczorek
|
||||
*/
|
||||
|
||||
public final class S3OutputStream extends OutputStream {
|
||||
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(S3OutputStream.class);
|
||||
|
||||
/**
|
||||
* Minimum multipart chunk size 5MB
|
||||
* https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
|
||||
*/
|
||||
private static final int MIN_MULTIPART_UPLOAD = 5 * 1024 * 1024;
|
||||
|
||||
/**
|
||||
* Amazon S3 API implementation to use.
|
||||
*/
|
||||
private final S3Client s3;
|
||||
|
||||
/**
|
||||
* ID of the S3 object to store data into.
|
||||
*/
|
||||
private final S3ObjectId objectId;
|
||||
|
||||
/**
|
||||
* Amazon S3 storage class to apply to the newly created S3 object, if any.
|
||||
*/
|
||||
private StorageClass storageClass;
|
||||
|
||||
private ServerSideEncryption storageEncryption;
|
||||
|
||||
private String kmsKeyId;
|
||||
|
||||
private String contentType;
|
||||
|
||||
/**
|
||||
* Indicates if the stream has been closed.
|
||||
*/
|
||||
private volatile boolean closed;
|
||||
|
||||
/**
|
||||
* Indicates if the upload has been aborted
|
||||
*/
|
||||
private volatile boolean aborted;
|
||||
|
||||
/**
|
||||
* If a multipart upload is in progress, holds the ID for it, {@code null} otherwise.
|
||||
*/
|
||||
private volatile String uploadId;
|
||||
|
||||
/**
|
||||
* If a multipart upload is in progress, holds the ETags of the uploaded parts, {@code null} otherwise.
|
||||
*/
|
||||
private Queue<CompletedPart> completedParts;
|
||||
|
||||
/**
|
||||
* Holds upload request metadata
|
||||
*/
|
||||
private final S3MultipartOptions request;
|
||||
|
||||
/**
|
||||
* Instead of allocate a new buffer for each chunks recycle them, putting
|
||||
* a buffer instance into this queue when the upload process is completed
|
||||
*/
|
||||
final private Queue<ByteBuffer> bufferPool = new ConcurrentLinkedQueue<ByteBuffer>();
|
||||
|
||||
/**
|
||||
* The executor service (thread pool) which manages the upload in background
|
||||
*/
|
||||
private ExecutorService executor;
|
||||
|
||||
/**
|
||||
* The current working buffer
|
||||
*/
|
||||
private ByteBuffer buf;
|
||||
|
||||
private MessageDigest md5;
|
||||
|
||||
/**
|
||||
* Phaser object to synchronize stream termination
|
||||
*/
|
||||
private Phaser phaser;
|
||||
|
||||
/**
|
||||
* Count the number of uploaded chunks
|
||||
*/
|
||||
private int partsCount;
|
||||
|
||||
private int bufferSize;
|
||||
|
||||
private ObjectCannedACL cannedAcl;
|
||||
|
||||
private List<Tag> tags;
|
||||
|
||||
private final AtomicInteger bufferCounter = new AtomicInteger();
|
||||
|
||||
/**
|
||||
* Creates a new {@code S3OutputStream} that writes data directly into the S3 object with the given {@code objectId}.
|
||||
* No special object metadata or storage class will be attached to the object.
|
||||
*
|
||||
*/
|
||||
public S3OutputStream(final S3Client s3, S3ObjectId objectId, S3MultipartOptions request) {
|
||||
this.s3 = requireNonNull(s3);
|
||||
this.objectId = requireNonNull(objectId);
|
||||
this.request = request;
|
||||
this.bufferSize = request.getBufferSize();
|
||||
}
|
||||
|
||||
private ByteBuffer expandBuffer(ByteBuffer byteBuffer) {
|
||||
|
||||
final float expandFactor = 2.5f;
|
||||
final int newCapacity = Math.min( (int)(byteBuffer.capacity() * expandFactor), bufferSize );
|
||||
|
||||
// cast to prevent Java 8 / Java 11 cross compile-runtime error
|
||||
// https://www.morling.dev/blog/bytebuffer-and-the-dreaded-nosuchmethoderror/
|
||||
((java.nio.Buffer)byteBuffer).flip();
|
||||
ByteBuffer expanded = ByteBuffer.allocate(newCapacity);
|
||||
expanded.order(byteBuffer.order());
|
||||
expanded.put(byteBuffer);
|
||||
return expanded;
|
||||
}
|
||||
|
||||
public S3OutputStream setCannedAcl(ObjectCannedACL acl) {
|
||||
this.cannedAcl = acl;
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3OutputStream setTags(List<Tag> tags) {
|
||||
this.tags = tags;
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3OutputStream setStorageClass(String storageClass) {
|
||||
if( storageClass!=null )
|
||||
this.storageClass = StorageClass.fromValue(storageClass);
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3OutputStream setStorageEncryption(String storageEncryption) {
|
||||
if( storageEncryption!=null )
|
||||
this.storageEncryption = ServerSideEncryption.fromValue(storageEncryption);
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3OutputStream setKmsKeyId(String kmsKeyId) {
|
||||
this.kmsKeyId = kmsKeyId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3OutputStream setContentType(String type) {
|
||||
this.contentType = type;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A MD5 message digester
|
||||
*/
|
||||
private MessageDigest createMd5() {
|
||||
try {
|
||||
return MessageDigest.getInstance("MD5");
|
||||
}
|
||||
catch(NoSuchAlgorithmException e) {
|
||||
throw new IllegalStateException("Cannot find a MD5 algorithm provider",e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a byte into the uploader buffer. When it is full starts the upload process
|
||||
* in a asynchronous manner
|
||||
*
|
||||
* @param b The byte to be written
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public void write (int b) throws IOException {
|
||||
if( closed ){
|
||||
throw new IOException("Can't write into a closed stream");
|
||||
}
|
||||
if( buf == null ) {
|
||||
buf = allocate();
|
||||
md5 = createMd5();
|
||||
}
|
||||
else if( !buf.hasRemaining() ) {
|
||||
if( buf.position() < bufferSize ) {
|
||||
buf = expandBuffer(buf);
|
||||
}
|
||||
else {
|
||||
flush();
|
||||
// create a new buffer
|
||||
buf = allocate();
|
||||
md5 = createMd5();
|
||||
}
|
||||
}
|
||||
|
||||
buf.put((byte) b);
|
||||
// update the md5 checksum
|
||||
md5.update((byte) b);
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush the current buffer uploading to S3 storage
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
// send out the current buffer
|
||||
if( uploadBuffer(buf, false) ) {
|
||||
// clear the current buffer
|
||||
buf = null;
|
||||
md5 = null;
|
||||
}
|
||||
}
|
||||
|
||||
private ByteBuffer allocate() {
|
||||
|
||||
if( partsCount==0 ) {
|
||||
// this class is expected to be used to upload small files
|
||||
// start with a small buffer and growth if more space if necessary
|
||||
final int initialSize = 100 * 1024;
|
||||
return ByteBuffer.allocate(initialSize);
|
||||
}
|
||||
|
||||
// try to reuse a buffer from the poll
|
||||
ByteBuffer result = bufferPool.poll();
|
||||
if( result != null ) {
|
||||
result.clear();
|
||||
}
|
||||
else {
|
||||
// allocate a new buffer
|
||||
log.debug("Allocating new buffer of {} bytes, total buffers {}", bufferSize, bufferCounter.incrementAndGet());
|
||||
result = ByteBuffer.allocate(bufferSize);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Upload the given buffer to S3 storage in a asynchronous manner.
|
||||
* NOTE: when the executor service is busy (i.e. there are any more free threads)
|
||||
* this method will block
|
||||
*
|
||||
* return: true if the buffer can be reused, false if still needs to be used
|
||||
*/
|
||||
private boolean uploadBuffer(ByteBuffer buf, boolean last) throws IOException {
|
||||
// when the buffer is empty nothing to do
|
||||
if( buf == null || buf.position()==0 ) { return false; }
|
||||
|
||||
// Intermediate uploads needs to have at least MIN bytes
|
||||
if( buf.position() < MIN_MULTIPART_UPLOAD && !last){
|
||||
return false;
|
||||
}
|
||||
|
||||
if (partsCount == 0) {
|
||||
init();
|
||||
}
|
||||
|
||||
// set the buffer in read mode and submit for upload
|
||||
executor.submit( task(buf, md5.digest(), ++partsCount) );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize multipart upload data structures
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
private void init() throws IOException {
|
||||
// get the upload id
|
||||
uploadId = initiateMultipartUpload().uploadId();
|
||||
if (uploadId == null) {
|
||||
throw new IOException("Failed to get a valid multipart upload ID from Amazon S3");
|
||||
}
|
||||
// create the executor
|
||||
executor = getOrCreateExecutor(request.getMaxThreads());
|
||||
completedParts = new LinkedBlockingQueue<>();
|
||||
phaser = new Phaser();
|
||||
phaser.register();
|
||||
log.trace("[S3 phaser] Register - Starting S3 upload: {}; chunk-size: {}; max-threads: {}", uploadId, bufferSize, request.getMaxThreads());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a {@link Runnable} task to handle the upload process
|
||||
* in background
|
||||
*
|
||||
* @param buffer The buffer to be uploaded
|
||||
* @param partIndex The index count
|
||||
* @return
|
||||
*/
|
||||
private Runnable task(final ByteBuffer buffer, final byte[] checksum, final int partIndex) {
|
||||
|
||||
phaser.register();
|
||||
log.trace("[S3 phaser] Task register");
|
||||
return new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
uploadPart(buffer, checksum, partIndex, false);
|
||||
}
|
||||
catch (IOException e) {
|
||||
final StringWriter writer = new StringWriter();
|
||||
e.printStackTrace(new PrintWriter(writer));
|
||||
log.error("Upload: {} > Error for part: {}\nCaused by: {}", uploadId, partIndex, writer.toString());
|
||||
}
|
||||
finally {
|
||||
log.trace("[S3 phaser] Task arriveAndDeregisterphaser");
|
||||
phaser.arriveAndDeregister();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the stream uploading any remaining buffered data
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (uploadId == null) {
|
||||
if( buf != null )
|
||||
putObject(buf, md5.digest());
|
||||
else
|
||||
// this is needed when trying to upload an empty
|
||||
putObject(new ByteArrayInputStream(new byte[]{}), 0, createMd5().digest());
|
||||
}
|
||||
else {
|
||||
// -- upload remaining chunk
|
||||
if( buf != null )
|
||||
uploadBuffer(buf, true);
|
||||
|
||||
// -- shutdown upload executor and await termination
|
||||
log.trace("[S3 phaser] Close arriveAndAwaitAdvance");
|
||||
phaser.arriveAndAwaitAdvance();
|
||||
|
||||
// -- complete upload process
|
||||
completeMultipartUpload();
|
||||
}
|
||||
|
||||
closed = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts the multipart upload process
|
||||
*
|
||||
* @return An instance of {@link CreateMultipartUploadResponse}
|
||||
* @throws IOException
|
||||
*/
|
||||
private CreateMultipartUploadResponse initiateMultipartUpload() throws IOException {
|
||||
final CreateMultipartUploadRequest.Builder reqBuilder = //
|
||||
CreateMultipartUploadRequest.builder().bucket(objectId.bucket()).key(objectId.key());
|
||||
|
||||
if (storageClass != null) {
|
||||
reqBuilder.storageClass(storageClass);
|
||||
}
|
||||
|
||||
if( cannedAcl != null ) {
|
||||
reqBuilder.acl(cannedAcl);
|
||||
}
|
||||
|
||||
if( kmsKeyId !=null ) {
|
||||
reqBuilder.ssekmsKeyId(kmsKeyId);
|
||||
}
|
||||
|
||||
if( storageEncryption != null ) {
|
||||
reqBuilder.serverSideEncryption(storageEncryption);
|
||||
}
|
||||
|
||||
if( contentType != null ) {
|
||||
reqBuilder.contentType(contentType);
|
||||
}
|
||||
final CreateMultipartUploadRequest request = reqBuilder.build();
|
||||
if( log.isTraceEnabled() ) {
|
||||
log.trace("S3 initiateMultipartUpload {}", request);
|
||||
}
|
||||
|
||||
try {
|
||||
return s3.createMultipartUpload(request);
|
||||
} catch (final SdkException e) {
|
||||
throw new IOException("Failed to initiate Amazon S3 multipart upload", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload the given buffer to the S3 storage using a multipart process
|
||||
*
|
||||
* @param buf The buffer holding the data to upload
|
||||
* @param partNumber The progressive index of this chunk (1-based)
|
||||
* @param lastPart {@code true} when it is the last chunk
|
||||
* @throws IOException
|
||||
*/
|
||||
private void uploadPart( final ByteBuffer buf, final byte[] checksum, final int partNumber, final boolean lastPart ) throws IOException {
|
||||
// cast to prevent Java 8 / Java 11 cross compile-runtime error
|
||||
// https://www.morling.dev/blog/bytebuffer-and-the-dreaded-nosuchmethoderror/
|
||||
((java.nio.Buffer)buf).flip();
|
||||
((java.nio.Buffer)buf).mark();
|
||||
|
||||
int attempt=0;
|
||||
boolean success=false;
|
||||
try {
|
||||
while( !success ) {
|
||||
attempt++;
|
||||
int len = buf.limit();
|
||||
try {
|
||||
log.trace("Uploading part {} with length {} attempt {} for {} ", partNumber, len, attempt, objectId);
|
||||
uploadPart( new ByteBufferInputStream(buf), len, checksum , partNumber, lastPart );
|
||||
success=true;
|
||||
}
|
||||
catch (SdkException | IOException e) {
|
||||
if( attempt == request.getMaxAttempts() )
|
||||
throw new IOException("Failed to upload multipart data to Amazon S3", e);
|
||||
|
||||
log.debug("Failed to upload part {} attempt {} for {} -- Caused by: {}", partNumber, attempt, objectId, e.getMessage());
|
||||
sleep(request.getRetrySleep());
|
||||
buf.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
finally {
|
||||
if (!success) {
|
||||
closed = true;
|
||||
abortMultipartUpload();
|
||||
}
|
||||
bufferPool.offer(buf);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void uploadPart(final InputStream content, final long contentLength, final byte[] checksum, final int partNumber, final boolean lastPart)
|
||||
throws IOException {
|
||||
|
||||
if (aborted) return;
|
||||
|
||||
final UploadPartRequest.Builder reqBuilder = UploadPartRequest.builder();
|
||||
reqBuilder.bucket(objectId.bucket());
|
||||
reqBuilder.key(objectId.key());
|
||||
reqBuilder.uploadId(uploadId);
|
||||
reqBuilder.partNumber(partNumber);
|
||||
reqBuilder.contentLength(contentLength);
|
||||
reqBuilder.contentMD5(Base64.getEncoder().encodeToString(checksum));
|
||||
|
||||
final UploadPartResponse resp = s3.uploadPart(reqBuilder.build(), RequestBody.fromInputStream(content, contentLength));
|
||||
log.trace("Uploaded part {} with length {} for {}: {}", partNumber, contentLength, objectId, resp.eTag());
|
||||
completedParts.add(CompletedPart.builder()
|
||||
.partNumber(partNumber)
|
||||
.eTag(resp.eTag())
|
||||
.build());
|
||||
|
||||
}
|
||||
|
||||
private void sleep( long millis ) {
|
||||
try {
|
||||
Thread.sleep(millis);
|
||||
}
|
||||
catch (InterruptedException e) {
|
||||
log.trace("Sleep was interrupted -- Cause: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Aborts the multipart upload process
|
||||
*/
|
||||
private synchronized void abortMultipartUpload() {
|
||||
if (aborted) return;
|
||||
|
||||
log.debug("Aborting multipart upload {} for {}", uploadId, objectId);
|
||||
try {
|
||||
s3.abortMultipartUpload(AbortMultipartUploadRequest.builder().bucket(objectId.bucket()).key(objectId.key()).uploadId(uploadId).build());
|
||||
}
|
||||
catch (final SdkException e) {
|
||||
log.warn("Failed to abort multipart upload {}: {}", uploadId, e.getMessage());
|
||||
}
|
||||
aborted = true;
|
||||
log.trace("[S3 phaser] MultipartUpload arriveAndDeregister");
|
||||
phaser.arriveAndDeregister();
|
||||
}
|
||||
|
||||
/**
|
||||
* Completes the multipart upload process
|
||||
* @throws IOException
|
||||
*/
|
||||
private void completeMultipartUpload() throws IOException {
|
||||
// if aborted upload just ignore it
|
||||
if( aborted ) return;
|
||||
|
||||
final int partCount = completedParts.size();
|
||||
log.trace("Completing upload to {} consisting of {} parts", objectId, partCount);
|
||||
|
||||
//Ensure parts are sorted by partNumber
|
||||
CompletedPart[] parts = completedParts.stream()
|
||||
.sorted(Comparator.comparingInt(CompletedPart::partNumber))
|
||||
.toArray(CompletedPart[]::new);
|
||||
try {
|
||||
final CompletedMultipartUpload completedUpload = CompletedMultipartUpload.builder()
|
||||
.parts(parts)
|
||||
.build();
|
||||
|
||||
s3.completeMultipartUpload(CompleteMultipartUploadRequest.builder()
|
||||
.bucket(objectId.bucket())
|
||||
.key(objectId.key())
|
||||
.uploadId(uploadId)
|
||||
.multipartUpload(completedUpload)
|
||||
.build());
|
||||
} catch (final SdkException e) {
|
||||
throw new IOException("Failed to complete Amazon S3 multipart upload", e);
|
||||
}
|
||||
|
||||
log.trace("Completed upload to {} consisting of {} parts", objectId, partCount);
|
||||
|
||||
uploadId = null;
|
||||
completedParts = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores the given buffer using a single-part upload process
|
||||
* @param buf
|
||||
* @throws IOException
|
||||
*/
|
||||
private void putObject(ByteBuffer buf, byte[] checksum) throws IOException {
|
||||
// cast to prevent Java 8 / Java 11 cross compile-runtime error
|
||||
// https://www.morling.dev/blog/bytebuffer-and-the-dreaded-nosuchmethoderror/
|
||||
((java.nio.Buffer)buf).flip();
|
||||
putObject(new ByteBufferInputStream(buf), buf.limit(), checksum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores the given buffer using a single-part upload process
|
||||
*
|
||||
* @param contentLength
|
||||
* @param content
|
||||
* @throws IOException
|
||||
*/
|
||||
private void putObject(final InputStream content, final long contentLength, byte[] checksum) throws IOException {
|
||||
final PutObjectRequest.Builder reqBuilder = PutObjectRequest.builder();
|
||||
reqBuilder.bucket(objectId.bucket());
|
||||
reqBuilder.key(objectId.key());
|
||||
reqBuilder.contentLength(contentLength);
|
||||
reqBuilder.contentMD5( Base64.getEncoder().encodeToString(checksum) );
|
||||
if( cannedAcl!=null ) {
|
||||
reqBuilder.acl(cannedAcl);
|
||||
}
|
||||
|
||||
if (storageClass != null) {
|
||||
reqBuilder.storageClass(storageClass);
|
||||
}
|
||||
|
||||
if( tags!=null && tags.size()>0 ) {
|
||||
reqBuilder.tagging(Tagging.builder().tagSet(tags).build() );
|
||||
}
|
||||
|
||||
if( kmsKeyId !=null ) {
|
||||
reqBuilder.ssekmsKeyId(kmsKeyId);
|
||||
}
|
||||
|
||||
if( storageEncryption != null ) {
|
||||
reqBuilder.serverSideEncryption( storageEncryption );
|
||||
}
|
||||
|
||||
if( contentType != null ) {
|
||||
reqBuilder.contentType(contentType);
|
||||
}
|
||||
PutObjectRequest request = reqBuilder.build();
|
||||
if( log.isTraceEnabled() ) {
|
||||
log.trace("S3 putObject {}", request);
|
||||
}
|
||||
|
||||
try {
|
||||
s3.putObject(request, RequestBody.fromInputStream(content, contentLength));
|
||||
} catch (final SdkException e) {
|
||||
throw new IOException("Failed to put data into Amazon S3 object", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Number of uploaded chunks
|
||||
*/
|
||||
int getPartsCount() {
|
||||
return partsCount;
|
||||
}
|
||||
|
||||
|
||||
/** holds a singleton executor instance */
|
||||
static private volatile ExecutorService executorSingleton;
|
||||
|
||||
/**
|
||||
* Creates a singleton executor instance.
|
||||
*
|
||||
* @param maxThreads
|
||||
* The max number of allowed threads in the executor pool.
|
||||
* NOTE: changing the size parameter after the first invocation has no effect.
|
||||
* @return The executor instance
|
||||
*/
|
||||
static synchronized ExecutorService getOrCreateExecutor(int maxThreads) {
|
||||
if( executorSingleton == null ) {
|
||||
executorSingleton = ThreadPoolManager.create("S3StreamUploader", maxThreads);
|
||||
}
|
||||
return executorSingleton;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,614 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.LinkOption;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.WatchEvent;
|
||||
import java.nio.file.WatchKey;
|
||||
import java.nio.file.WatchService;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import nextflow.cloud.aws.nio.util.S3ObjectId;
|
||||
import software.amazon.awssdk.services.s3.model.S3Object;
|
||||
import software.amazon.awssdk.services.s3.model.Tag;
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Splitter;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
import nextflow.file.TagAwareFile;
|
||||
import static com.google.common.collect.Iterables.concat;
|
||||
import static com.google.common.collect.Iterables.filter;
|
||||
import static com.google.common.collect.Iterables.transform;
|
||||
import static java.lang.String.format;
|
||||
|
||||
public class S3Path implements Path, TagAwareFile {
|
||||
|
||||
public static final String PATH_SEPARATOR = "/";
|
||||
/**
|
||||
* bucket name
|
||||
*/
|
||||
private final String bucket;
|
||||
/**
|
||||
* Parts without bucket name.
|
||||
*/
|
||||
private final List<String> parts;
|
||||
/**
|
||||
* actual filesystem
|
||||
*/
|
||||
private S3FileSystem fileSystem;
|
||||
|
||||
private S3Object object;
|
||||
|
||||
private Map<String,String> tags;
|
||||
|
||||
private String contentType;
|
||||
|
||||
private String storageClass;
|
||||
|
||||
/**
|
||||
* path must be a string of the form "/{bucket}", "/{bucket}/{key}" or just
|
||||
* "{key}".
|
||||
* Examples:
|
||||
* <ul>
|
||||
* <li>"/{bucket}//{value}" good, empty key paths are ignored </li>
|
||||
* <li> "//{key}" error, missing bucket</li>
|
||||
* <li> "/" error, missing bucket </li>
|
||||
* </ul>
|
||||
*
|
||||
*/
|
||||
public S3Path(S3FileSystem fileSystem, String path) {
|
||||
|
||||
this(fileSystem, path, "");
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an S3Path from path segments. '/' are stripped from each segment.
|
||||
* @param first should be star with a '/' and the first element is the bucket
|
||||
* @param more directories and files
|
||||
*/
|
||||
public S3Path(S3FileSystem fileSystem, String first,
|
||||
String ... more) {
|
||||
|
||||
String bucket = null;
|
||||
List<String> parts = Lists.newArrayList(Splitter.on(PATH_SEPARATOR).split(first));
|
||||
|
||||
if (first.endsWith(PATH_SEPARATOR)) {
|
||||
parts.remove(parts.size()-1);
|
||||
}
|
||||
|
||||
if (first.startsWith(PATH_SEPARATOR)) { // absolute path
|
||||
Preconditions.checkArgument(parts.size() >= 1,
|
||||
"path must start with bucket name");
|
||||
Preconditions.checkArgument(!parts.get(1).isEmpty(),
|
||||
"bucket name must be not empty");
|
||||
|
||||
bucket = parts.get(1);
|
||||
|
||||
if (!parts.isEmpty()) {
|
||||
parts = parts.subList(2, parts.size());
|
||||
}
|
||||
}
|
||||
|
||||
if (bucket != null) {
|
||||
bucket = bucket.replace("/", "");
|
||||
}
|
||||
|
||||
List<String> moreSplitted = Lists.newArrayList();
|
||||
|
||||
for (String part : more){
|
||||
moreSplitted.addAll(Lists.newArrayList(Splitter.on(PATH_SEPARATOR).split(part)));
|
||||
}
|
||||
|
||||
parts.addAll(moreSplitted);
|
||||
|
||||
|
||||
this.bucket = bucket;
|
||||
this.parts = KeyParts.parse(parts);
|
||||
this.fileSystem = fileSystem;
|
||||
}
|
||||
|
||||
private S3Path(S3FileSystem fileSystem, String bucket,
|
||||
Iterable<String> keys){
|
||||
this.bucket = bucket;
|
||||
this.parts = KeyParts.parse(keys);
|
||||
this.fileSystem = fileSystem;
|
||||
}
|
||||
|
||||
|
||||
public String getBucket() {
|
||||
return bucket;
|
||||
}
|
||||
|
||||
/**
|
||||
* key for amazon without final slash.
|
||||
* <b>note:</b> the final slash need to be added to save a directory (Amazon s3 spec)
|
||||
*/
|
||||
public String getKey() {
|
||||
if (parts.isEmpty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
ImmutableList.Builder<String> builder = ImmutableList
|
||||
.<String> builder().addAll(parts);
|
||||
|
||||
return Joiner.on(PATH_SEPARATOR).join(builder.build());
|
||||
}
|
||||
|
||||
public S3ObjectId toS3ObjectId() {
|
||||
return new S3ObjectId(bucket, getKey());
|
||||
}
|
||||
|
||||
@Override
|
||||
public S3FileSystem getFileSystem() {
|
||||
return this.fileSystem;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isAbsolute() {
|
||||
return bucket != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getRoot() {
|
||||
if (isAbsolute()) {
|
||||
return new S3Path(fileSystem, bucket, ImmutableList.<String> of());
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getFileName() {
|
||||
if (!parts.isEmpty()) {
|
||||
return new S3Path(fileSystem, null, parts.subList(parts.size() - 1,
|
||||
parts.size()));
|
||||
}
|
||||
else {
|
||||
// bucket dont have fileName
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getParent() {
|
||||
// bucket is not present in the parts
|
||||
if (parts.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (parts.size() == 1 && (bucket == null || bucket.isEmpty())){
|
||||
return null;
|
||||
}
|
||||
|
||||
return new S3Path(fileSystem, bucket,
|
||||
parts.subList(0, parts.size() - 1));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNameCount() {
|
||||
return parts.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getName(int index) {
|
||||
return new S3Path(fileSystem, null, parts.subList(index, index + 1));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path subpath(int beginIndex, int endIndex) {
|
||||
return new S3Path(fileSystem, null, parts.subList(beginIndex, endIndex));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean startsWith(Path other) {
|
||||
|
||||
if (other.getNameCount() > this.getNameCount()){
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!(other instanceof S3Path)){
|
||||
return false;
|
||||
}
|
||||
|
||||
S3Path path = (S3Path) other;
|
||||
|
||||
if (path.parts.size() == 0 && path.bucket == null &&
|
||||
(this.parts.size() != 0 || this.bucket != null)){
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((path.getBucket() != null && !path.getBucket().equals(this.getBucket())) ||
|
||||
(path.getBucket() == null && this.getBucket() != null)){
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < path.parts.size() ; i++){
|
||||
if (!path.parts.get(i).equals(this.parts.get(i))){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean startsWith(String path) {
|
||||
S3Path other = new S3Path(this.fileSystem, path);
|
||||
return this.startsWith(other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean endsWith(Path other) {
|
||||
if (other.getNameCount() > this.getNameCount()){
|
||||
return false;
|
||||
}
|
||||
// empty
|
||||
if (other.getNameCount() == 0 &&
|
||||
this.getNameCount() != 0){
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!(other instanceof S3Path)){
|
||||
return false;
|
||||
}
|
||||
|
||||
S3Path path = (S3Path) other;
|
||||
|
||||
if ((path.getBucket() != null && !path.getBucket().equals(this.getBucket())) ||
|
||||
(path.getBucket() != null && this.getBucket() == null)){
|
||||
return false;
|
||||
}
|
||||
|
||||
// check subkeys
|
||||
|
||||
int i = path.parts.size() - 1;
|
||||
int j = this.parts.size() - 1;
|
||||
for (; i >= 0 && j >= 0 ;){
|
||||
|
||||
if (!path.parts.get(i).equals(this.parts.get(j))){
|
||||
return false;
|
||||
}
|
||||
i--;
|
||||
j--;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean endsWith(String other) {
|
||||
return this.endsWith(new S3Path(this.fileSystem, other));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path normalize() {
|
||||
if( parts==null || parts.size()==0 )
|
||||
return this;
|
||||
|
||||
return new S3Path(fileSystem, bucket, normalize0(parts));
|
||||
}
|
||||
|
||||
private Iterable<String> normalize0(List<String> parts) {
|
||||
final String s0 = Path.of(String.join(PATH_SEPARATOR, parts)).normalize().toString();
|
||||
return Lists.newArrayList(Splitter.on(PATH_SEPARATOR).split(s0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path resolve(Path other) {
|
||||
Preconditions.checkArgument(other instanceof S3Path,
|
||||
"other must be an instance of %s", S3Path.class.getName());
|
||||
|
||||
S3Path s3Path = (S3Path) other;
|
||||
|
||||
if (s3Path.isAbsolute()) {
|
||||
return s3Path;
|
||||
}
|
||||
|
||||
if (s3Path.parts.isEmpty()) { // other is relative and empty
|
||||
return this;
|
||||
}
|
||||
|
||||
return new S3Path(fileSystem, bucket, concat(parts, s3Path.parts));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path resolve(String other) {
|
||||
return resolve(new S3Path(this.getFileSystem(), other));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path resolveSibling(Path other) {
|
||||
Preconditions.checkArgument(other instanceof S3Path,
|
||||
"other must be an instance of %s", S3Path.class.getName());
|
||||
|
||||
S3Path s3Path = (S3Path) other;
|
||||
|
||||
Path parent = getParent();
|
||||
|
||||
if (parent == null || s3Path.isAbsolute()) {
|
||||
return s3Path;
|
||||
}
|
||||
|
||||
if (s3Path.parts.isEmpty()) { // other is relative and empty
|
||||
return parent;
|
||||
}
|
||||
|
||||
return new S3Path(fileSystem, bucket, concat(
|
||||
parts.subList(0, parts.size() - 1), s3Path.parts));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path resolveSibling(String other) {
|
||||
return resolveSibling(new S3Path(this.getFileSystem(), other));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path relativize(Path other) {
|
||||
Preconditions.checkArgument(other instanceof S3Path,
|
||||
"other must be an instance of %s", S3Path.class.getName());
|
||||
S3Path s3Path = (S3Path) other;
|
||||
|
||||
if (this.equals(other)) {
|
||||
return new S3Path(this.getFileSystem(), "");
|
||||
}
|
||||
|
||||
Preconditions.checkArgument(isAbsolute(),
|
||||
"Path is already relative: %s", this);
|
||||
Preconditions.checkArgument(s3Path.isAbsolute(),
|
||||
"Cannot relativize against a relative path: %s", s3Path);
|
||||
Preconditions.checkArgument(bucket.equals(s3Path.getBucket()),
|
||||
"Cannot relativize paths with different buckets: '%s', '%s'",
|
||||
this, other);
|
||||
|
||||
Preconditions.checkArgument(parts.size() <= s3Path.parts.size(),
|
||||
"Cannot relativize against a parent path: '%s', '%s'",
|
||||
this, other);
|
||||
|
||||
|
||||
int startPart = 0;
|
||||
for (int i = 0; i <this.parts.size() ; i++){
|
||||
if (this.parts.get(i).equals(s3Path.parts.get(i))){
|
||||
startPart++;
|
||||
}
|
||||
}
|
||||
|
||||
List<String> resultParts = new ArrayList<>();
|
||||
for (int i = startPart; i < s3Path.parts.size(); i++){
|
||||
resultParts.add(s3Path.parts.get(i));
|
||||
}
|
||||
|
||||
return new S3Path(fileSystem, null, resultParts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public URI toUri() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("s3://");
|
||||
if (fileSystem.getEndpoint() != null) {
|
||||
builder.append(fileSystem.getEndpoint());
|
||||
}
|
||||
builder.append("/");
|
||||
builder.append(bucket);
|
||||
builder.append(PATH_SEPARATOR);
|
||||
builder.append(Joiner.on(PATH_SEPARATOR).join(parts));
|
||||
return URI.create(builder.toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path toAbsolutePath() {
|
||||
if (isAbsolute()) {
|
||||
return this;
|
||||
}
|
||||
|
||||
throw new IllegalStateException(format(
|
||||
"Relative path cannot be made absolute: %s", this));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path toRealPath(LinkOption... options) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public File toFile() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public WatchKey register(WatchService watcher, WatchEvent.Kind<?>[] events,
|
||||
WatchEvent.Modifier... modifiers) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public WatchKey register(WatchService watcher, WatchEvent.Kind<?>... events)
|
||||
throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<Path> iterator() {
|
||||
ImmutableList.Builder<Path> builder = ImmutableList.builder();
|
||||
|
||||
for (Iterator<String> iterator = parts.iterator(); iterator.hasNext();) {
|
||||
String part = iterator.next();
|
||||
builder.add(new S3Path(fileSystem, null, ImmutableList.of(part)));
|
||||
}
|
||||
|
||||
return builder.build().iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Path other) {
|
||||
return toString().compareTo(other.toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
if (isAbsolute()) {
|
||||
builder.append(PATH_SEPARATOR);
|
||||
builder.append(bucket);
|
||||
builder.append(PATH_SEPARATOR);
|
||||
}
|
||||
|
||||
builder.append(Joiner.on(PATH_SEPARATOR).join(parts));
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
S3Path paths = (S3Path) o;
|
||||
|
||||
if (bucket != null ? !bucket.equals(paths.bucket)
|
||||
: paths.bucket != null) {
|
||||
return false;
|
||||
}
|
||||
if (!parts.equals(paths.parts)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = bucket != null ? bucket.hashCode() : 0;
|
||||
result = 31 * result + parts.hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns the cached {@link S3Object} instance if this path has been created
|
||||
* while iterating a directory structures by the {@link S3Iterator}.
|
||||
* <br>
|
||||
* After calling this method the cached object is reset, so any following method invocation will return {@code null}.
|
||||
* This is necessary to discard the object meta-data and force to reload file attributes when required.
|
||||
*
|
||||
* @return The cached {@link S3Object} for this path if any.
|
||||
*/
|
||||
public S3Object fetchObject() {
|
||||
S3Object result = object;
|
||||
object = null;
|
||||
return result;
|
||||
}
|
||||
|
||||
// note: package scope to limit the access to this setter
|
||||
void setObjectSummary(S3Object objectSummary) {
|
||||
this.object = objectSummary;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTags(Map<String,String> tags) {
|
||||
this.tags = tags;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setContentType(String type) {
|
||||
this.contentType = type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setStorageClass(String storageClass) {
|
||||
this.storageClass = storageClass;
|
||||
}
|
||||
|
||||
public List<Tag> getTagsList() {
|
||||
// nothing found, just return
|
||||
if( tags==null )
|
||||
return Collections.emptyList();
|
||||
// create a list of Tag out of the Map
|
||||
List<Tag> result = new ArrayList<>();
|
||||
for( Map.Entry<String,String> entry : tags.entrySet()) {
|
||||
result.add( Tag.builder().key(entry.getKey()).value(entry.getValue()).build() );
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public String getContentType() {
|
||||
return contentType;
|
||||
}
|
||||
|
||||
public String getStorageClass() {
|
||||
return storageClass;
|
||||
}
|
||||
|
||||
// ~ helpers methods
|
||||
|
||||
private static Function<String, String> strip(final String ... strs) {
|
||||
return new Function<String, String>() {
|
||||
public String apply(String input) {
|
||||
String res = input;
|
||||
for (String str : strs) {
|
||||
res = res.replace(str, "");
|
||||
}
|
||||
return res;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static Predicate<String> notEmpty() {
|
||||
return new Predicate<String>() {
|
||||
@Override
|
||||
public boolean apply(@Nullable String input) {
|
||||
return input != null && !input.isEmpty();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* delete redundant "/" and empty parts
|
||||
*/
|
||||
private abstract static class KeyParts {
|
||||
|
||||
private static ImmutableList<String> parse(List<String> parts) {
|
||||
return ImmutableList.copyOf(filter(transform(parts, strip("/")), notEmpty()));
|
||||
}
|
||||
|
||||
private static ImmutableList<String> parse(Iterable<String> parts) {
|
||||
return ImmutableList.copyOf(filter(transform(parts, strip("/")), notEmpty()));
|
||||
}
|
||||
}
|
||||
|
||||
public static String bucketName(URI uri) {
|
||||
final String path = uri.getPath();
|
||||
if( path==null || !path.startsWith("/") )
|
||||
throw new IllegalArgumentException("Invalid S3 path: " + uri);
|
||||
final String[] parts = path.split("/");
|
||||
// note the element 0 contains the slash char
|
||||
return parts.length>1 ? parts[1] : null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.ng;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
/**
|
||||
* Hold a buffer for transfer a remote object chunk
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@Deprecated
|
||||
public class ChunkBuffer implements Comparable<ChunkBuffer> {
|
||||
|
||||
private static final int BUFFER_SIZE = 8192;
|
||||
|
||||
private final ByteBuffer target;
|
||||
|
||||
private final ChunkBufferFactory owner;
|
||||
|
||||
private final int index;
|
||||
|
||||
ChunkBuffer(ChunkBufferFactory owner, int capacity, int index) {
|
||||
this.owner = owner;
|
||||
this.target = ByteBuffer.allocateDirect(capacity);
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
int getIndex() {
|
||||
return index;
|
||||
}
|
||||
|
||||
int getByte() {
|
||||
return target.get() & 0xFF;
|
||||
}
|
||||
|
||||
void writeByte(int ch) {
|
||||
target.put((byte)ch);
|
||||
}
|
||||
|
||||
void fill(InputStream stream) throws IOException {
|
||||
int n;
|
||||
byte[] b = new byte[BUFFER_SIZE];
|
||||
while ((n = stream.read(b)) != -1 ) {
|
||||
target.put(b, 0, n);
|
||||
}
|
||||
}
|
||||
|
||||
void makeReadable() {
|
||||
// cast to prevent Java 8 / Java 11 cross compile-runtime error
|
||||
// https://www.morling.dev/blog/bytebuffer-and-the-dreaded-nosuchmethoderror/
|
||||
((java.nio.Buffer)target).flip();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
// cast to prevent Java 8 / Java 11 cross compile-runtime error
|
||||
// https://www.morling.dev/blog/bytebuffer-and-the-dreaded-nosuchmethoderror/
|
||||
((java.nio.Buffer)target).clear();
|
||||
}
|
||||
|
||||
int getBytes( byte[] buff, int off, int len ) {
|
||||
int c=0;
|
||||
int i=off;
|
||||
while( c<len && target.hasRemaining() ) {
|
||||
c++;
|
||||
buff[i++] = target.get();
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
boolean hasRemaining() {
|
||||
return target.hasRemaining();
|
||||
}
|
||||
|
||||
public void release() {
|
||||
owner.giveBack(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(ChunkBuffer other) {
|
||||
return Integer.compare(index, other.index);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.ng;
|
||||
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Model a buffer for download chunk
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
public class ChunkBufferFactory {
|
||||
|
||||
final Logger log = LoggerFactory.getLogger(ChunkBufferFactory.class);
|
||||
|
||||
final private BlockingQueue<ChunkBuffer> pool;
|
||||
|
||||
final private AtomicInteger count;
|
||||
|
||||
private final int chunkSize;
|
||||
|
||||
private final int capacity;
|
||||
|
||||
public ChunkBufferFactory(int chunkSize, int capacity) {
|
||||
this.chunkSize = chunkSize;
|
||||
this.capacity = capacity;
|
||||
this.pool = new ArrayBlockingQueue<>(capacity);
|
||||
this.count = new AtomicInteger();
|
||||
}
|
||||
|
||||
|
||||
public ChunkBuffer create() throws InterruptedException {
|
||||
ChunkBuffer result = pool.poll(100, TimeUnit.MILLISECONDS);
|
||||
if( result != null ) {
|
||||
result.clear();
|
||||
return result;
|
||||
}
|
||||
|
||||
// add logistic delay to slow down the allocation of new buffer
|
||||
// when the request approach or exceed the max capacity
|
||||
final int indx = count.getAndIncrement();
|
||||
if( log.isTraceEnabled() )
|
||||
log.trace("Creating a new buffer index={}; capacity={}", indx, capacity);
|
||||
return new ChunkBuffer(this, chunkSize, indx);
|
||||
}
|
||||
|
||||
void giveBack(ChunkBuffer buffer) {
|
||||
if( pool.offer(buffer) ) {
|
||||
if( log.isTraceEnabled() )
|
||||
log.trace("Returning buffer {} to pool size={}", buffer.getIndex(), pool.size());
|
||||
}
|
||||
else {
|
||||
int cc = count.decrementAndGet();
|
||||
if( log.isTraceEnabled() )
|
||||
log.trace("Returning buffer index={} for GC; pool size={}; count={}", buffer.getIndex(), pool.size(), cc);
|
||||
}
|
||||
}
|
||||
|
||||
int getPoolSize() { return pool.size(); }
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.ng;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import nextflow.util.Duration;
|
||||
import nextflow.util.MemoryUnit;
|
||||
|
||||
/**
|
||||
* Model S3 download options
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
public class DownloadOpts {
|
||||
|
||||
final private boolean parallelEnabled;
|
||||
private final int queueMaxSize;
|
||||
private final int numWorkers;
|
||||
private final MemoryUnit chunkSize;
|
||||
private final MemoryUnit bufferMaxSize;
|
||||
private final int maxAttempts;
|
||||
private final Duration maxDelay;
|
||||
|
||||
DownloadOpts() {
|
||||
this(new Properties(), Collections.emptyMap());
|
||||
}
|
||||
|
||||
DownloadOpts(Map opts) {
|
||||
this(props(opts), Collections.emptyMap());
|
||||
}
|
||||
|
||||
static private Properties props(Map opts) {
|
||||
Properties result = new Properties();
|
||||
result.putAll(opts);
|
||||
return result;
|
||||
}
|
||||
|
||||
DownloadOpts(Properties props, Map<String,String> env) {
|
||||
this.parallelEnabled = props.containsKey("download_parallel")
|
||||
? Boolean.parseBoolean(props.getProperty("download_parallel")) : (env.containsKey("NXF_S3_DOWNLOAD_PARALLEL") ? Boolean.parseBoolean(env.get("NXF_S3_DOWNLOAD_PARALLEL")) : false);
|
||||
|
||||
this.queueMaxSize = props.containsKey("download_queue_max_size")
|
||||
? Integer.parseInt(props.getProperty("download_queue_max_size")) : ( env.containsKey("NXF_S3_DOWNLOAD_QUEUE_SIZE") ? Integer.parseInt(env.get("NXF_S3_DOWNLOAD_QUEUE_SIZE")) : 10_000 );
|
||||
|
||||
this.numWorkers = props.containsKey("download_num_workers")
|
||||
? Integer.parseInt(props.getProperty("download_num_workers")) : ( env.containsKey("NXF_S3_DOWNLOAD_NUM_WORKERS") ? Integer.parseInt(env.get("NXF_S3_DOWNLOAD_NUM_WORKERS")) : 10 );
|
||||
|
||||
this.chunkSize = props.containsKey("download_chunk_size")
|
||||
? MemoryUnit.of(props.getProperty("download_chunk_size")) : ( env.containsKey("NXF_S3_DOWNLOAD_CHUNK_SIZE") ? MemoryUnit.of(env.get("NXF_S3_DOWNLOAD_CHUNK_SIZE")) : MemoryUnit.of("10 MB") );
|
||||
|
||||
this.bufferMaxSize = props.containsKey("download_buffer_max_size")
|
||||
? MemoryUnit.of(props.getProperty("download_buffer_max_size")) : ( env.containsKey("NXF_S3_DOWNLOAD_BUFFER_MAX_MEM") ? MemoryUnit.of(env.get("NXF_S3_DOWNLOAD_BUFFER_MAX_MEM")) : MemoryUnit.of("1 GB") );
|
||||
|
||||
this.maxAttempts = props.containsKey("download_max_attempts")
|
||||
? Integer.parseInt(props.getProperty("download_max_attempts")) : ( env.containsKey("NXF_S3_DOWNLOAD_MAX_ATTEMPTS") ? Integer.parseInt(env.get("NXF_S3_DOWNLOAD_MAX_ATTEMPTS")) : 5 );
|
||||
|
||||
this.maxDelay = props.containsKey("download_max_delay")
|
||||
? Duration.of(props.getProperty("download_max_delay")) : ( env.containsKey("NXF_S3_DOWNLOAD_MAX_DELAY") ? Duration.of(env.get("NXF_S3_DOWNLOAD_MAX_DELAY")) : Duration.of("90s") );
|
||||
|
||||
}
|
||||
|
||||
static public DownloadOpts from(Properties props) {
|
||||
return from(props, System.getenv());
|
||||
}
|
||||
|
||||
static public DownloadOpts from(Properties props, Map<String,String> env) {
|
||||
return new DownloadOpts(props, env);
|
||||
}
|
||||
|
||||
public boolean parallelEnabled() { return parallelEnabled; }
|
||||
|
||||
@Deprecated public int queueMaxSize() { return queueMaxSize; }
|
||||
|
||||
public MemoryUnit chunkSizeMem() { return chunkSize; }
|
||||
|
||||
public int chunkSize() { return (int)chunkSize.toBytes(); }
|
||||
|
||||
public MemoryUnit bufferMaxSize() { return bufferMaxSize; }
|
||||
|
||||
public int numWorkers() { return numWorkers; }
|
||||
|
||||
public long maxDelayMillis() {
|
||||
return maxDelay.getMillis();
|
||||
}
|
||||
|
||||
public int maxAttempts() {
|
||||
return maxAttempts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("workers=%s; chunkSize=%s; queueSize=%s; max-mem=%s; maxAttempts=%s; maxDelay=%s", numWorkers, chunkSize, queueMaxSize, bufferMaxSize, maxAttempts, maxDelay);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.ng;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InterruptedIOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
/**
|
||||
* Implements an input stream emitting a collection of futures {@link ChunkBuffer}
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
public class FutureInputStream extends InputStream {
|
||||
|
||||
private final Iterator<Future<ChunkBuffer>> futures;
|
||||
private ChunkBuffer buffer;
|
||||
|
||||
FutureInputStream(Iterator<Future<ChunkBuffer>> futures) {
|
||||
this.futures = futures;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
|
||||
if( (buffer == null || !buffer.hasRemaining()) ) {
|
||||
freeBuffer();
|
||||
if( futures.hasNext() ) {
|
||||
buffer = nextBuffer();
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return buffer.getByte();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] b, int off, int len) throws IOException {
|
||||
|
||||
if( (buffer == null || !buffer.hasRemaining()) ) {
|
||||
freeBuffer();
|
||||
if( futures.hasNext() ) {
|
||||
buffer = nextBuffer();
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return buffer.getBytes(b, off, len);
|
||||
}
|
||||
|
||||
private ChunkBuffer nextBuffer() throws IOException {
|
||||
try {
|
||||
return futures.next().get();
|
||||
}
|
||||
catch (ExecutionException e) {
|
||||
throw new IOException("Failed to acquire stream chunk", e);
|
||||
}
|
||||
catch (InterruptedException e) {
|
||||
throw new InterruptedIOException();
|
||||
}
|
||||
}
|
||||
|
||||
private void freeBuffer() {
|
||||
if( buffer!=null ) {
|
||||
buffer.release();
|
||||
buffer=null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
freeBuffer();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.ng;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* Implements an iterator that progressively submit a collection of tasks to the
|
||||
* specifies executor and iterates over the responses returned as {@link Future}
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
* @author Jordi Deu-Pons <jordi@seqera.io>
|
||||
*/
|
||||
public class FutureIterator<REQ,RESP> implements Iterator<Future<RESP>> {
|
||||
|
||||
final private ExecutorService executor;
|
||||
final private Iterator<REQ> parts;
|
||||
final private Queue<Future<RESP>> futures = new LinkedList<>();
|
||||
final private Function<REQ, RESP> task;
|
||||
final private int initialSize;
|
||||
|
||||
FutureIterator(List<REQ> parts, Function<REQ, RESP> task, ExecutorService executor, int initialSize) {
|
||||
this.parts = parts.iterator();
|
||||
this.task = task;
|
||||
this.executor = executor;
|
||||
this.initialSize = initialSize;
|
||||
|
||||
init();
|
||||
}
|
||||
|
||||
private void init() {
|
||||
// Add up to `numWorkers` *2 parts on start
|
||||
int submitted = 0;
|
||||
while (parts.hasNext() && submitted++ < initialSize ) {
|
||||
// note: making `parts.next()` inline in the lambda causes to delay
|
||||
// the evaluate in a separate thread causing concurrency problems
|
||||
REQ req = parts.next();
|
||||
futures.add(executor.submit( () -> task.apply(req) ));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return !futures.isEmpty() || parts.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<RESP> next() {
|
||||
// keep busy the download workers adding a new chunk
|
||||
// to download each time one is consumed
|
||||
if( parts.hasNext() ) {
|
||||
// note: making `parts.next()` inline in the lambda causes to delay
|
||||
// the evaluate in a separate thread causing concurrency problems
|
||||
REQ req = parts.next();
|
||||
futures.add(executor.submit( () -> task.apply(req)) );
|
||||
}
|
||||
try {
|
||||
return futures.poll();
|
||||
}
|
||||
catch (Throwable t) {
|
||||
// in case of error cancel all pending tasks
|
||||
for( Future<RESP> it : futures ) {
|
||||
it.cancel(true);
|
||||
}
|
||||
throw t;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.util;
|
||||
|
||||
/**
|
||||
* @author Paolo Di Tommaso paolo.ditommaso@gmail.com
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
/**
|
||||
* An {@code InputStream} adaptor which reads data from a {@code ByteBuffer}
|
||||
*
|
||||
* See http://stackoverflow.com/a/6603018/395921
|
||||
*
|
||||
* @author Paolo Di Tommaso paolo.ditommaso@gmail.com
|
||||
*/
|
||||
public class ByteBufferInputStream extends InputStream {
|
||||
|
||||
ByteBuffer buf;
|
||||
|
||||
public ByteBufferInputStream(ByteBuffer buf) {
|
||||
this.buf = buf;
|
||||
}
|
||||
|
||||
public int read() throws IOException {
|
||||
if (!buf.hasRemaining()) {
|
||||
return -1;
|
||||
}
|
||||
return buf.get() & 0xFF;
|
||||
}
|
||||
|
||||
public int read(byte[] bytes, int off, int len) throws IOException {
|
||||
if (!buf.hasRemaining()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
len = Math.min(len, buf.remaining());
|
||||
buf.get(bytes, off, len);
|
||||
return len;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,168 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.util;
|
||||
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.Semaphore;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import software.amazon.awssdk.transfer.s3.S3TransferManager;
|
||||
import software.amazon.awssdk.transfer.s3.model.Copy;
|
||||
import software.amazon.awssdk.transfer.s3.model.CopyRequest;
|
||||
import software.amazon.awssdk.transfer.s3.model.DirectoryUpload;
|
||||
import software.amazon.awssdk.transfer.s3.model.DownloadFileRequest;
|
||||
import software.amazon.awssdk.transfer.s3.model.FileDownload;
|
||||
import software.amazon.awssdk.transfer.s3.model.FileUpload;
|
||||
import software.amazon.awssdk.transfer.s3.model.UploadDirectoryRequest;
|
||||
import software.amazon.awssdk.transfer.s3.model.UploadFileRequest;
|
||||
|
||||
import static nextflow.cloud.aws.config.AwsS3Config.*;
|
||||
|
||||
/**
|
||||
* Extends the S3 Transfer Manager with semaphores to limit concurrent
|
||||
* transfers based on available resources.
|
||||
*
|
||||
* Copies and uploads are limited based on the `maxConnections` setting.
|
||||
*
|
||||
* Downloads are limited based on the `maxDownloadHeapMemory` setting. The
|
||||
* CRT client allocates a buffer of 10 * part size for each transfer by default.
|
||||
*
|
||||
* @see https://github.com/aws/aws-sdk-java-v2/issues/6323
|
||||
*
|
||||
* @author Jorge Ejarque (jorge.ejarque@seqera.io)
|
||||
*/
|
||||
public class ExtendedS3TransferManager {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ExtendedS3TransferManager.class);
|
||||
|
||||
private S3TransferManager transferManager;
|
||||
private Semaphore semaphore;
|
||||
private long partSize;
|
||||
private int downloadPermits;
|
||||
private Semaphore downloadSemaphore;
|
||||
|
||||
public ExtendedS3TransferManager( S3TransferManager transferManager, Properties props){
|
||||
this.transferManager = transferManager;
|
||||
setDefaultSemaphore(props);
|
||||
setDownloadSemaphore(props);
|
||||
}
|
||||
|
||||
private void setDefaultSemaphore(Properties props) {
|
||||
int permits = 100;
|
||||
if( props.containsKey("max_connections")) {
|
||||
permits = Integer.parseInt(props.getProperty("max_connections"));
|
||||
}
|
||||
this.semaphore = new Semaphore(permits);
|
||||
}
|
||||
|
||||
private void setDownloadSemaphore(Properties props) {
|
||||
long maxBufferSize = DEFAULT_MAX_DOWNLOAD_BUFFER_SIZE;
|
||||
if( props.containsKey("max_download_heap_memory")) {
|
||||
log.trace("AWS client config - max_download_heap_memory: {}", props.getProperty("max_download_heap_memory"));
|
||||
maxBufferSize = Long.parseLong(props.getProperty("max_download_heap_memory"));
|
||||
}
|
||||
|
||||
this.partSize = DEFAULT_PART_SIZE;
|
||||
if( props.containsKey("minimum_part_size")) {
|
||||
log.trace("AWS client config - minimum_part_size: {}", props.getProperty("minimum_part_size"));
|
||||
this.partSize = Long.parseLong(props.getProperty("minimum_part_size"));
|
||||
}
|
||||
|
||||
this.downloadPermits = (int) Math.floor((double) maxBufferSize / partSize);
|
||||
this.downloadSemaphore = new Semaphore(downloadPermits);
|
||||
}
|
||||
|
||||
public long getPartSize() {
|
||||
return partSize;
|
||||
}
|
||||
|
||||
public int getDownloadPermits() {
|
||||
return downloadPermits;
|
||||
}
|
||||
|
||||
public FileDownload downloadFile(DownloadFileRequest request, long size) throws InterruptedException {
|
||||
int parts = estimateParts(size);
|
||||
FileDownload fileDownload;
|
||||
downloadSemaphore.acquire(parts);
|
||||
try {
|
||||
fileDownload = transferManager.downloadFile(request);
|
||||
} catch (Throwable e) {
|
||||
// Release semaphore when runtime exception during the downloadFile submission
|
||||
downloadSemaphore.release(parts);
|
||||
throw e;
|
||||
}
|
||||
// Ensure permits are always released after completion
|
||||
fileDownload
|
||||
.completionFuture()
|
||||
.whenComplete((result, error) -> downloadSemaphore.release(parts));
|
||||
return fileDownload;
|
||||
}
|
||||
|
||||
protected int estimateParts(long size) {
|
||||
if (size <= 0)
|
||||
return 1;
|
||||
int parts = (int) Math.ceil((double) size / partSize);
|
||||
return Math.min(parts, DEFAULT_INIT_BUFFER_PARTS);
|
||||
}
|
||||
|
||||
public FileUpload uploadFile(UploadFileRequest request) throws InterruptedException {
|
||||
FileUpload fileUpload;
|
||||
semaphore.acquire();
|
||||
try {
|
||||
fileUpload = transferManager.uploadFile(request);
|
||||
} catch (Throwable e) {
|
||||
semaphore.release();
|
||||
throw e;
|
||||
}
|
||||
fileUpload
|
||||
.completionFuture()
|
||||
.whenComplete((result, error) -> semaphore.release());
|
||||
return fileUpload;
|
||||
}
|
||||
|
||||
public DirectoryUpload uploadDirectory(UploadDirectoryRequest request) throws InterruptedException {
|
||||
DirectoryUpload directoryUpload;
|
||||
semaphore.acquire();
|
||||
try {
|
||||
directoryUpload = transferManager.uploadDirectory(request);
|
||||
} catch (Throwable e) {
|
||||
semaphore.release();
|
||||
throw e;
|
||||
}
|
||||
directoryUpload
|
||||
.completionFuture()
|
||||
.whenComplete((result, error) -> semaphore.release());
|
||||
return directoryUpload;
|
||||
}
|
||||
|
||||
public Copy copy(CopyRequest request) throws InterruptedException {
|
||||
Copy copy;
|
||||
semaphore.acquire();
|
||||
try {
|
||||
copy = transferManager.copy(request);
|
||||
} catch (Throwable e) {
|
||||
semaphore.release();
|
||||
throw e;
|
||||
}
|
||||
copy
|
||||
.completionFuture()
|
||||
.whenComplete((result, error) -> semaphore.release());
|
||||
return copy;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.util;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Utilities for streams
|
||||
*/
|
||||
public abstract class IOUtils {
|
||||
/**
|
||||
* get the stream content and return as a byte array
|
||||
* @param is InputStream
|
||||
* @return byte array
|
||||
* @throws IOException if the stream is closed
|
||||
*/
|
||||
public static byte[] toByteArray(InputStream is) throws IOException {
|
||||
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
|
||||
|
||||
int nRead;
|
||||
byte[] data = new byte[16384];
|
||||
|
||||
while ((nRead = is.read(data, 0, data.length)) != -1) {
|
||||
buffer.write(data, 0, nRead);
|
||||
}
|
||||
|
||||
buffer.flush();
|
||||
|
||||
return buffer.toByteArray();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package nextflow.cloud.aws.nio.util;
|
||||
|
||||
import software.amazon.awssdk.services.s3.crt.S3CrtConnectionHealthConfiguration;
|
||||
import software.amazon.awssdk.services.s3.crt.S3CrtProxyConfiguration;
|
||||
import software.amazon.awssdk.services.s3.crt.S3CrtHttpConfiguration;
|
||||
import software.amazon.awssdk.services.s3.crt.S3CrtRetryConfiguration;
|
||||
import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Class to convert Amazon properties in S3 asynchronous client configuration
|
||||
*
|
||||
* @author Jorge Ejarque <jorge.ejarque@seqera.io>
|
||||
*/
|
||||
public class S3AsyncClientConfiguration extends S3ClientConfiguration {
|
||||
|
||||
private static final long DEFAULT_SOCKET_TIMEOUT_MS = 30_000L;
|
||||
|
||||
private S3CrtHttpConfiguration.Builder crtHttpConfiguration;
|
||||
private MultipartConfiguration.Builder multiPartBuilder;
|
||||
private S3CrtRetryConfiguration crtRetryConfiguration;
|
||||
private Integer maxConcurrency ;
|
||||
private Double targetThroughputInGbps;
|
||||
private Long maxNativeMemoryInBytes;
|
||||
|
||||
private S3CrtHttpConfiguration.Builder crtHttpConfiguration() {
|
||||
if( this.crtHttpConfiguration == null)
|
||||
this.crtHttpConfiguration = S3CrtHttpConfiguration.builder();
|
||||
return this.crtHttpConfiguration;
|
||||
}
|
||||
|
||||
private MultipartConfiguration.Builder multipartBuilder() {
|
||||
if( this.multiPartBuilder == null)
|
||||
this.multiPartBuilder = MultipartConfiguration.builder();
|
||||
return this.multiPartBuilder;
|
||||
}
|
||||
|
||||
public S3CrtHttpConfiguration getCrtHttpConfiguration() {
|
||||
if ( this.crtHttpConfiguration == null )
|
||||
return null;
|
||||
return this.crtHttpConfiguration.build();
|
||||
}
|
||||
|
||||
public MultipartConfiguration getMultipartConfiguration() {
|
||||
if( this.multiPartBuilder == null )
|
||||
return null;
|
||||
return this.multiPartBuilder.build();
|
||||
}
|
||||
|
||||
private S3AsyncClientConfiguration() {
|
||||
super();
|
||||
}
|
||||
|
||||
public S3CrtRetryConfiguration getCrtRetryConfiguration() {
|
||||
return this.crtRetryConfiguration;
|
||||
}
|
||||
|
||||
public Integer getMaxConcurrency() {
|
||||
return this.maxConcurrency;
|
||||
}
|
||||
|
||||
public Double getTargetThroughputInGbps() {
|
||||
return this.targetThroughputInGbps;
|
||||
}
|
||||
|
||||
public Long getMaxNativeMemoryInBytes() {
|
||||
return this.maxNativeMemoryInBytes;
|
||||
}
|
||||
|
||||
private void setAsyncConfiguration(Properties props) {
|
||||
|
||||
if( props.containsKey("max_error_retry")) {
|
||||
log.trace("AWS client config - max_error_retry: {}", props.getProperty("max_error_retry"));
|
||||
this.crtRetryConfiguration = S3CrtRetryConfiguration.builder().numRetries(Integer.parseInt(props.getProperty("max_error_retry"))).build();
|
||||
}
|
||||
|
||||
if( props.containsKey("max_concurrency")) {
|
||||
log.trace("AWS client config - max_concurrency: {}", props.getProperty("max_concurrency"));
|
||||
this.maxConcurrency = Integer.parseInt(props.getProperty("max_concurrency"));
|
||||
}
|
||||
|
||||
if( props.containsKey("target_throughput_in_gbps")) {
|
||||
log.trace("AWS client config - target_throughput_in_gbps: {}", props.getProperty("target_throughput_in_gbps"));
|
||||
this.targetThroughputInGbps = Double.parseDouble(props.getProperty("target_throughput_in_gbps"));
|
||||
}
|
||||
|
||||
if( props.containsKey("max_native_memory")) {
|
||||
log.trace("AWS client config - max_native_memory: {}", props.getProperty("max_native_memory"));
|
||||
this.maxNativeMemoryInBytes = Long.parseLong(props.getProperty("max_native_memory"));
|
||||
}
|
||||
|
||||
if( props.containsKey("minimum_part_size")) {
|
||||
log.trace("AWS client config - minimum_part_size: {}", props.getProperty("minimum_part_size"));
|
||||
multipartBuilder().minimumPartSizeInBytes(Long.parseLong(props.getProperty("minimum_part_size")));
|
||||
}
|
||||
|
||||
if( props.containsKey("multipart_threshold")) {
|
||||
log.trace("AWS client config - multipart_threshold: {}", props.getProperty("multipart_threshold"));
|
||||
multipartBuilder().thresholdInBytes(Long.parseLong(props.getProperty("multipart_threshold")));
|
||||
}
|
||||
|
||||
if( props.containsKey("connection_timeout") ) {
|
||||
log.trace("AWS client config - connection_timeout: {}", props.getProperty("connection_timeout"));
|
||||
crtHttpConfiguration().connectionTimeout(Duration.ofMillis(Long.parseLong(props.getProperty("connection_timeout"))));
|
||||
}
|
||||
|
||||
final long socketTimeoutMs = props.containsKey("socket_timeout")
|
||||
? Long.parseLong(props.getProperty("socket_timeout"))
|
||||
: DEFAULT_SOCKET_TIMEOUT_MS;
|
||||
log.trace("AWS client config - socket_timeout: {} (using CRT health configuration with minimum throughput 1bps)", socketTimeoutMs);
|
||||
crtHttpConfiguration().connectionHealthConfiguration(
|
||||
S3CrtConnectionHealthConfiguration.builder()
|
||||
.minimumThroughputInBps(1L)
|
||||
.minimumThroughputTimeout(Duration.ofMillis(socketTimeoutMs))
|
||||
.build()
|
||||
);
|
||||
|
||||
if( props.containsKey("proxy_host")) {
|
||||
final String host = props.getProperty("proxy_host");
|
||||
final S3CrtProxyConfiguration.Builder crtProxyConfig = S3CrtProxyConfiguration.builder();
|
||||
log.trace("AWS client config - proxy host {}", host);
|
||||
crtProxyConfig.host(host);
|
||||
if (props.containsKey("proxy_port")) {
|
||||
crtProxyConfig.port(Integer.parseInt(props.getProperty("proxy_port")));
|
||||
}
|
||||
if (props.containsKey("proxy_username")) {
|
||||
crtProxyConfig.username(props.getProperty("proxy_username"));
|
||||
}
|
||||
if (props.containsKey("proxy_password")) {
|
||||
crtProxyConfig.password(props.getProperty("proxy_password"));
|
||||
}
|
||||
if (props.containsKey("proxy_scheme")) {
|
||||
crtProxyConfig.scheme(props.getProperty("proxy_scheme"));
|
||||
}
|
||||
if (props.containsKey("proxy_domain")) {
|
||||
log.warn("AWS client config 'proxy_domain' doesn't exist in AWS SDK V2 Async Client");
|
||||
}
|
||||
if (props.containsKey("proxy_workstation")) {
|
||||
log.warn("AWS client config 'proxy_workstation' doesn't exist in AWS SDK V2 Async Client");
|
||||
}
|
||||
crtHttpConfiguration().proxyConfiguration(crtProxyConfig.build());
|
||||
}
|
||||
}
|
||||
|
||||
public static S3AsyncClientConfiguration create(Properties props) {
|
||||
S3AsyncClientConfiguration config = new S3AsyncClientConfiguration();
|
||||
if( props != null ) {
|
||||
config.setClientOverrideConfiguration(props);
|
||||
config.setAsyncConfiguration(props);
|
||||
}
|
||||
return config;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package nextflow.cloud.aws.nio.util;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import software.amazon.awssdk.auth.signer.Aws4Signer;
|
||||
import software.amazon.awssdk.auth.signer.AwsS3V4Signer;
|
||||
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
|
||||
import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption;
|
||||
import software.amazon.awssdk.core.signer.Signer;
|
||||
import software.amazon.awssdk.retries.StandardRetryStrategy;
|
||||
import software.amazon.awssdk.utils.ClassLoaderHelper;
|
||||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Class to convert Amazon properties in S3 client override configuration
|
||||
*
|
||||
* @author Jorge Ejarque <jorge.ejarque@seqera.io>
|
||||
*/
|
||||
public class S3ClientConfiguration {
|
||||
|
||||
protected static final Logger log = LoggerFactory.getLogger(S3ClientConfiguration.class);
|
||||
|
||||
private ClientOverrideConfiguration.Builder cocBuilder;
|
||||
|
||||
private ClientOverrideConfiguration.Builder cocBuilder() {
|
||||
if( this.cocBuilder == null )
|
||||
this.cocBuilder = ClientOverrideConfiguration.builder();
|
||||
return this.cocBuilder;
|
||||
}
|
||||
|
||||
public ClientOverrideConfiguration getClientOverrideConfiguration() {
|
||||
if( cocBuilder == null )
|
||||
return null;
|
||||
return cocBuilder.build();
|
||||
}
|
||||
|
||||
protected S3ClientConfiguration() {}
|
||||
|
||||
|
||||
protected final void setClientOverrideConfiguration(Properties props) {
|
||||
if( props == null )
|
||||
return;
|
||||
|
||||
if( props.containsKey("max_error_retry")) {
|
||||
log.trace("AWS client config - max_error_retry: {}", props.getProperty("max_error_retry"));
|
||||
cocBuilder().retryStrategy(StandardRetryStrategy.builder().maxAttempts((Integer.parseInt(props.getProperty("max_error_retry")) + 1 )).build());
|
||||
}
|
||||
|
||||
if( props.containsKey("protocol")) {
|
||||
log.warn("AWS client config 'protocol' doesn't exist in AWS SDK V2");
|
||||
}
|
||||
|
||||
if ( props.containsKey("signer_override")) {
|
||||
log.warn("AWS client config 'signerOverride' is not supported in AWS SDK V2. This option will be ignored.");
|
||||
|
||||
}
|
||||
|
||||
if( props.containsKey("socket_send_buffer_size_hints") || props.containsKey("socket_recv_buffer_size_hints") ) {
|
||||
log.warn("AWS client config - 'socket_send_buffer_size_hints' and 'socket_recv_buffer_size_hints' do not exist in AWS SDK V2" );
|
||||
}
|
||||
|
||||
if( props.containsKey("user_agent")) {
|
||||
log.warn("AWS client config 'user_agent' is not supported in AWS SDK V2. This option will be ignored.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,233 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.util;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public class S3MultipartOptions {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(S3MultipartOptions.class);
|
||||
|
||||
public static final int DEFAULT_CHUNK_SIZE = 100 << 20; // 100 MiB
|
||||
|
||||
public static final int DEFAULT_BUFFER_SIZE = 10485760;
|
||||
|
||||
/*
|
||||
* S3 Max copy size
|
||||
* https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html
|
||||
*/
|
||||
public static final long DEFAULT_MAX_COPY_SIZE = 5_000_000_000L;
|
||||
|
||||
/**
|
||||
* Upload chunk max size
|
||||
*/
|
||||
private int chunkSize;
|
||||
|
||||
/**
|
||||
* Maximum number of threads allowed
|
||||
*/
|
||||
private int maxThreads;
|
||||
|
||||
/**
|
||||
* Buffer size used by the stream uploader
|
||||
*/
|
||||
private int bufferSize;
|
||||
|
||||
/**
|
||||
* Copy object max size
|
||||
*/
|
||||
private long maxCopySize;
|
||||
|
||||
/**
|
||||
* Maximum number of attempts to upload a chunk in a multiparts upload process
|
||||
*/
|
||||
private int maxAttempts;
|
||||
|
||||
/**
|
||||
* Time (milliseconds) to wait after a failed upload to retry a chunk upload
|
||||
*/
|
||||
private long retrySleep;
|
||||
|
||||
|
||||
/*
|
||||
* initialize default values
|
||||
*/
|
||||
{
|
||||
retrySleep = 500;
|
||||
chunkSize = DEFAULT_CHUNK_SIZE;
|
||||
maxAttempts = 5;
|
||||
maxThreads = Runtime.getRuntime().availableProcessors() *3;
|
||||
bufferSize = DEFAULT_BUFFER_SIZE;
|
||||
maxCopySize = DEFAULT_MAX_COPY_SIZE;
|
||||
}
|
||||
|
||||
public S3MultipartOptions() {
|
||||
|
||||
}
|
||||
|
||||
public S3MultipartOptions(Properties props) {
|
||||
setMaxThreads(props.getProperty("upload_max_threads"));
|
||||
setChunkSize(props.getProperty("upload_chunk_size"));
|
||||
setMaxAttempts(props.getProperty("upload_max_attempts"));
|
||||
setRetrySleep(props.getProperty("upload_retry_sleep"));
|
||||
setBufferSize(props.getProperty("upload_buffer_size"));
|
||||
setMaxCopySize(props.getProperty("max_copy_size"));
|
||||
}
|
||||
|
||||
public int getChunkSize() {
|
||||
return chunkSize;
|
||||
}
|
||||
|
||||
public int getMaxThreads() {
|
||||
return maxThreads;
|
||||
}
|
||||
|
||||
public int getMaxAttempts() {
|
||||
return maxAttempts;
|
||||
}
|
||||
|
||||
public long getRetrySleep() {
|
||||
return retrySleep;
|
||||
}
|
||||
|
||||
public int getBufferSize() { return bufferSize; }
|
||||
|
||||
public long getMaxCopySize() { return maxCopySize; }
|
||||
|
||||
public S3MultipartOptions setChunkSize(int chunkSize) {
|
||||
this.chunkSize = chunkSize;
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3MultipartOptions setChunkSize(String chunkSize) {
|
||||
if( chunkSize==null )
|
||||
return this;
|
||||
|
||||
try {
|
||||
setChunkSize(Integer.parseInt(chunkSize));
|
||||
}
|
||||
catch( NumberFormatException e ) {
|
||||
log.warn("Not a valid AWS S3 multipart upload chunk size: `{}` -- Using default", chunkSize);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3MultipartOptions setBufferSize(int bufferSize) {
|
||||
this.bufferSize = bufferSize;
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3MultipartOptions setBufferSize(String bufferSize) {
|
||||
if( bufferSize==null )
|
||||
return this;
|
||||
|
||||
try {
|
||||
setBufferSize(Integer.parseInt(bufferSize));
|
||||
}
|
||||
catch( NumberFormatException e ) {
|
||||
log.warn("Not a valid AWS S3 multipart upload buffer size: `{}` -- Using default", bufferSize);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3MultipartOptions setMaxCopySize(String value) {
|
||||
if( value==null )
|
||||
return this;
|
||||
|
||||
try {
|
||||
maxCopySize = Long.parseLong(value);
|
||||
}
|
||||
catch( NumberFormatException e ) {
|
||||
log.warn("Not a valid AWS S3 copy max size: `{}` -- Using default", maxCopySize);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3MultipartOptions setMaxThreads(int maxThreads) {
|
||||
this.maxThreads = maxThreads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3MultipartOptions setMaxThreads(String maxThreads) {
|
||||
if( maxThreads==null )
|
||||
return this;
|
||||
|
||||
try {
|
||||
setMaxThreads(Integer.parseInt(maxThreads));
|
||||
}
|
||||
catch( NumberFormatException e ) {
|
||||
log.warn("Not a valid AWS S3 multipart upload max threads: `{}` -- Using default", maxThreads);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3MultipartOptions setMaxAttempts(int maxAttempts) {
|
||||
this.maxAttempts = maxAttempts;
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3MultipartOptions setMaxAttempts(String maxAttempts) {
|
||||
if( maxAttempts == null )
|
||||
return this;
|
||||
|
||||
try {
|
||||
this.maxAttempts = Integer.parseInt(maxAttempts);
|
||||
}
|
||||
catch(NumberFormatException e ) {
|
||||
log.warn("Not a valid AWS S3 multipart upload max attempts value: `{}` -- Using default", maxAttempts);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3MultipartOptions setRetrySleep( long retrySleep ) {
|
||||
this.retrySleep = retrySleep;
|
||||
return this;
|
||||
}
|
||||
|
||||
public S3MultipartOptions setRetrySleep( String retrySleep ) {
|
||||
if( retrySleep == null )
|
||||
return this;
|
||||
|
||||
try {
|
||||
this.retrySleep = Long.parseLong(retrySleep);
|
||||
}
|
||||
catch (NumberFormatException e ) {
|
||||
log.warn("Not a valid AWS S3 multipart upload retry sleep value: `{}` -- Using default", retrySleep);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public long getRetrySleepWithAttempt( int attempt ) {
|
||||
return retrySleep * ( 1 << (attempt-1) );
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "chunkSize=" + chunkSize +
|
||||
"; maxThreads=" + maxThreads +
|
||||
"; maxAttempts=" + maxAttempts +
|
||||
"; retrySleep=" + retrySleep;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.util;
|
||||
|
||||
/**
|
||||
* Class to mimic Old V1 S3ObjectId
|
||||
*/
|
||||
public class S3ObjectId {
|
||||
private final String bucket;
|
||||
private final String key;
|
||||
private final String versionId;
|
||||
|
||||
public S3ObjectId(String bucket, String key, String versionId) {
|
||||
this.bucket = bucket;
|
||||
this.key = key;
|
||||
this.versionId = versionId;
|
||||
}
|
||||
|
||||
public S3ObjectId(String bucket, String key) {
|
||||
this(bucket, key, null);
|
||||
}
|
||||
|
||||
public String bucket() {
|
||||
return bucket;
|
||||
}
|
||||
|
||||
public String key() {
|
||||
return key;
|
||||
}
|
||||
|
||||
public String versionId() {
|
||||
return versionId;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,158 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.NoSuchFileException;
|
||||
import java.util.List;
|
||||
|
||||
import nextflow.cloud.aws.nio.S3Client;
|
||||
import software.amazon.awssdk.services.s3.model.*;
|
||||
import nextflow.cloud.aws.nio.S3Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class S3ObjectSummaryLookup {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(S3Object.class);
|
||||
|
||||
/**
|
||||
* Get the {@link software.amazon.awssdk.services.s3.model.S3Object} that represent this Path or its first child if the path does not exist
|
||||
* @param s3Path {@link S3Path}
|
||||
* @return {@link software.amazon.awssdk.services.s3.model.S3Object}
|
||||
* @throws java.io.IOException if path not found, access denied or error getting the object
|
||||
*/
|
||||
public S3Object lookup(S3Path s3Path) throws IOException {
|
||||
|
||||
/*
|
||||
* check is object summary has been cached
|
||||
*/
|
||||
S3Object summary = s3Path.fetchObject();
|
||||
if( summary != null ) {
|
||||
return summary;
|
||||
}
|
||||
|
||||
final S3Client client = s3Path.getFileSystem().getClient();
|
||||
|
||||
/*
|
||||
* when `key` is an empty string retrieve the object meta-data of the bucket
|
||||
*/
|
||||
if( "".equals(s3Path.getKey()) ) {
|
||||
HeadBucketResponse meta = client.getBucketMetadata(s3Path.getBucket());
|
||||
if( meta == null )
|
||||
throw new NoSuchFileException("s3://" + s3Path.getBucket());
|
||||
|
||||
summary = S3Object.builder()
|
||||
.key(s3Path.getKey())
|
||||
.build();
|
||||
|
||||
// TODO summary.setOwner(?);
|
||||
// TODO summary.setStorageClass(?);
|
||||
return summary;
|
||||
}
|
||||
|
||||
S3Object item = getS3Object(s3Path, client);
|
||||
if( item != null )
|
||||
return item;
|
||||
|
||||
throw new NoSuchFileException("s3://" + s3Path.getBucket() + "/" + s3Path.getKey());
|
||||
}
|
||||
|
||||
/**
|
||||
* Lookup for the S3 object matching the specified path using at most two bounded
|
||||
* {@code listObjects} calls (replaces the previous unbounded pagination loop).
|
||||
*
|
||||
* @param s3Path the S3 path to look up
|
||||
* @param client the S3 client
|
||||
* @return the matching {@link S3Object}, or {@code null} if not found
|
||||
*/
|
||||
private S3Object getS3Object(S3Path s3Path, S3Client client) throws IOException {
|
||||
|
||||
// Call 1: list up to 2 objects whose key starts with the target key.
|
||||
//
|
||||
// Why maxKeys(2) instead of paginating all results?
|
||||
// The previous implementation used an unbounded while(true) loop fetching 250 keys
|
||||
// per page. On prefixes with millions of objects this caused excessive S3 LIST API
|
||||
// calls, high latency, and potential timeouts. Two results are enough to cover
|
||||
// the common cases:
|
||||
// - Exact file match: the key itself exists as an object (e.g. "data.txt")
|
||||
// - Directory match: a child object (e.g. "data/file1") appears within the
|
||||
// first 2 lexicographic results
|
||||
ListObjectsRequest request = ListObjectsRequest.builder()
|
||||
.bucket(s3Path.getBucket())
|
||||
.prefix(s3Path.getKey())
|
||||
.maxKeys(2)
|
||||
.build();
|
||||
|
||||
ListObjectsResponse listing = client.listObjects(request);
|
||||
List<S3Object> results = listing.contents();
|
||||
|
||||
for( S3Object item : results ) {
|
||||
if( matchName(s3Path.getKey(), item)) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
|
||||
// Call 2 (fallback): list 1 object with prefix "key/" to detect directories
|
||||
// that Call 1 missed.
|
||||
//
|
||||
// Why can Call 1 miss a directory?
|
||||
// S3 lists keys in lexicographic (UTF-8 byte) order, and several common characters
|
||||
// sort *before* '/' (0x2F) — notably '-' (0x2D) and '.' (0x2E).
|
||||
//
|
||||
// Example: given keys "a-a/file-3", "a.txt", and "a/file-1", S3 returns them as:
|
||||
// a-a/file-3 ← '-' (0x2D) < '/' (0x2F)
|
||||
// a.txt ← '.' (0x2E) < '/' (0x2F)
|
||||
// a/file-1 ← '/' (0x2F) — the actual directory child
|
||||
//
|
||||
// With maxKeys(2), Call 1 only sees "a-a/file-3" and "a.txt" — neither matches
|
||||
// key "a" via matchName(). The directory child "a/file-1" is pushed beyond the
|
||||
// result window by sibling keys that sort earlier.
|
||||
//
|
||||
// By searching with prefix "a/" directly, we skip all those siblings and find
|
||||
// "a/file-1", confirming that "a" is a directory.
|
||||
request = ListObjectsRequest.builder()
|
||||
.bucket(s3Path.getBucket())
|
||||
.prefix(s3Path.getKey()+'/')
|
||||
.maxKeys(1)
|
||||
.build();
|
||||
|
||||
listing = client.listObjects(request);
|
||||
results = listing.contents();
|
||||
for( S3Object item : results ) {
|
||||
if( matchName(s3Path.getKey(), item)) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean matchName(String fileName, S3Object summary) {
|
||||
String foundKey = summary.key();
|
||||
|
||||
// they are different names return false
|
||||
if( !foundKey.startsWith(fileName) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// when they are the same length, they are identical
|
||||
if( foundKey.length() == fileName.length() )
|
||||
return true;
|
||||
|
||||
return foundKey.charAt(fileName.length()) == '/';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package nextflow.cloud.aws.nio.util;
|
||||
|
||||
import software.amazon.awssdk.http.SdkHttpClient;
|
||||
import software.amazon.awssdk.http.apache.ApacheHttpClient;
|
||||
import software.amazon.awssdk.http.apache.ProxyConfiguration;
|
||||
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.time.Duration;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Class to convert Amazon properties in S3 synchronous client configuration
|
||||
*
|
||||
* @author Jorge Ejarque <jorge.ejarque@seqera.io>
|
||||
*/
|
||||
public class S3SyncClientConfiguration extends S3ClientConfiguration{
|
||||
|
||||
// Sync client should always have a connection limit
|
||||
private int maxConnections = 50;
|
||||
|
||||
private ApacheHttpClient.Builder httpClientBuilder;
|
||||
|
||||
private ApacheHttpClient.Builder httpClientBuilder(){
|
||||
if( this.httpClientBuilder == null)
|
||||
this.httpClientBuilder = ApacheHttpClient.builder();
|
||||
return this.httpClientBuilder;
|
||||
}
|
||||
|
||||
public int getMaxConnections() {
|
||||
return maxConnections;
|
||||
}
|
||||
|
||||
public SdkHttpClient.Builder getHttpClientBuilder(){
|
||||
if ( this.httpClientBuilder == null )
|
||||
return null;
|
||||
return this.httpClientBuilder;
|
||||
}
|
||||
|
||||
private S3SyncClientConfiguration(){
|
||||
super();
|
||||
}
|
||||
|
||||
private void setClientHttpBuilder(Properties props) {
|
||||
if( props.containsKey("connection_timeout") ) {
|
||||
log.trace("AWS client config - connection_timeout: {}", props.getProperty("connection_timeout"));
|
||||
httpClientBuilder().connectionTimeout(Duration.ofMillis(Long.parseLong(props.getProperty("connection_timeout"))));
|
||||
}
|
||||
|
||||
if( props.containsKey("max_connections")) {
|
||||
log.trace("AWS client config - max_connections: {}", props.getProperty("max_connections"));
|
||||
this.maxConnections = Integer.parseInt(props.getProperty("max_connections"));
|
||||
httpClientBuilder().maxConnections(this.maxConnections);
|
||||
}
|
||||
|
||||
if( props.containsKey("socket_timeout")) {
|
||||
log.trace("AWS client config - socket_timeout: {}", props.getProperty("socket_timeout"));
|
||||
httpClientBuilder().socketTimeout(Duration.ofMillis(Long.parseLong(props.getProperty("socket_timeout"))));
|
||||
}
|
||||
|
||||
try {
|
||||
if( props.containsKey("proxy_host")) {
|
||||
final String host = props.getProperty("proxy_host");
|
||||
final int port = Integer.parseInt(props.getProperty("proxy_port", "-1"));
|
||||
final String scheme = props.getProperty("proxy_scheme", "http");
|
||||
final ProxyConfiguration.Builder proxyConfig = ProxyConfiguration.builder();
|
||||
log.trace("AWS client config - proxy {}://{}:{}", scheme, host, port);
|
||||
proxyConfig.endpoint(new URI(scheme, null, host, port, null, null, null));
|
||||
|
||||
if (props.containsKey("proxy_username")) {
|
||||
proxyConfig.username(props.getProperty("proxy_username"));
|
||||
}
|
||||
if (props.containsKey("proxy_password")) {
|
||||
proxyConfig.password(props.getProperty("proxy_password"));
|
||||
}
|
||||
|
||||
if (props.containsKey("proxy_domain")) {
|
||||
proxyConfig.ntlmDomain(props.getProperty("proxy_domain"));
|
||||
}
|
||||
if (props.containsKey("proxy_workstation")) {
|
||||
proxyConfig.ntlmWorkstation(props.getProperty("proxy_workstation"));
|
||||
}
|
||||
|
||||
httpClientBuilder().proxyConfiguration(proxyConfig.build());
|
||||
}
|
||||
} catch (URISyntaxException e){
|
||||
log.warn("Exception creating AWS client config - proxy URI", e);
|
||||
}
|
||||
}
|
||||
|
||||
public static S3SyncClientConfiguration create(Properties props) {
|
||||
S3SyncClientConfiguration config = new S3SyncClientConfiguration();
|
||||
|
||||
if( props != null ) {
|
||||
config.setClientOverrideConfiguration(props);
|
||||
config.setClientHttpBuilder(props);
|
||||
}
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.util
|
||||
|
||||
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
|
||||
import com.google.common.base.CaseFormat
|
||||
|
||||
/**
|
||||
* Helper class for AWS
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class AwsHelper {
|
||||
|
||||
static ObjectCannedACL parseS3Acl(String value) {
|
||||
if( !value )
|
||||
return null
|
||||
|
||||
return value.contains('-')
|
||||
? ObjectCannedACL.valueOf(CaseFormat.LOWER_HYPHEN.to(CaseFormat.UPPER_UNDERSCORE, value))
|
||||
: ObjectCannedACL.valueOf(CaseFormat.UPPER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE,value))
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.util
|
||||
|
||||
import java.nio.file.Path
|
||||
import java.util.regex.Pattern
|
||||
|
||||
import groovy.transform.CompileStatic
|
||||
/**
|
||||
* Parse and merge AWS config and credentials file
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@CompileStatic
|
||||
class ConfigParser {
|
||||
|
||||
final private static Pattern KEY_VALUE = ~/\s*(\w+)\s*=.*/
|
||||
|
||||
final Map<String, List<String>> content = new LinkedHashMap<>()
|
||||
|
||||
ConfigParser parseConfig(Path path) {
|
||||
return parseConfig(path.text)
|
||||
}
|
||||
|
||||
ConfigParser parseConfig(String text) {
|
||||
String current = null
|
||||
for( String line : text.readLines() ) {
|
||||
final section = parseSection(line)
|
||||
if( section ) {
|
||||
current = section
|
||||
}
|
||||
else if( current && line.trim() ) {
|
||||
final block = content.computeIfAbsent(current, (String it) -> new ArrayList<>())
|
||||
final key = findKey(line)
|
||||
final exists = key && block.any { findKey(it)==key }
|
||||
if( !key || !exists )
|
||||
block.add(line)
|
||||
}
|
||||
}
|
||||
|
||||
return this
|
||||
}
|
||||
|
||||
protected String parseSection(String str) {
|
||||
def line = str.trim()
|
||||
if( !line.startsWith('[') || !line.endsWith(']') ) {
|
||||
return null
|
||||
}
|
||||
line = line.substring(1, line.size()-1)
|
||||
if( line.startsWith('profile '))
|
||||
line = line.substring('profile '.size())
|
||||
return line
|
||||
}
|
||||
|
||||
String text() {
|
||||
final result = new StringBuilder()
|
||||
for( Map.Entry<String,List<String>> entry : content ) {
|
||||
result.append('[').append(entry.key).append(']\n')
|
||||
for( String line : entry.value ) {
|
||||
result.append(line).append('\n')
|
||||
}
|
||||
}
|
||||
return result.toString()
|
||||
}
|
||||
|
||||
protected String findKey(String line) {
|
||||
final m = KEY_VALUE.matcher(line)
|
||||
return m.matches() ? m.group(1) : null
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,214 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.util
|
||||
|
||||
import groovy.transform.CompileStatic
|
||||
import nextflow.Global
|
||||
import nextflow.Session
|
||||
import nextflow.cloud.aws.batch.AwsOptions
|
||||
import nextflow.executor.BashFunLib
|
||||
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
|
||||
|
||||
/**
|
||||
* AWS S3 helper class
|
||||
*/
|
||||
@CompileStatic
|
||||
class S3BashLib extends BashFunLib<S3BashLib> {
|
||||
|
||||
private String storageClass = 'STANDARD'
|
||||
private String storageEncryption = ''
|
||||
private String storageKmsKeyId = ''
|
||||
private String debug = ''
|
||||
private String cli = 'aws'
|
||||
private String retryMode
|
||||
private String s5cmdPath
|
||||
private String acl = ''
|
||||
private String requesterPays = ''
|
||||
private String forceGlacierTransfer = ''
|
||||
|
||||
S3BashLib withCliPath(String cliPath) {
|
||||
if( cliPath )
|
||||
this.cli = cliPath
|
||||
return this
|
||||
}
|
||||
|
||||
S3BashLib withRetryMode(String value) {
|
||||
if( value )
|
||||
retryMode = value
|
||||
return this
|
||||
}
|
||||
|
||||
S3BashLib withDebug(Boolean value) {
|
||||
this.debug = value ? '--debug ' : ''
|
||||
return this
|
||||
}
|
||||
|
||||
S3BashLib withStorageClass(String value) {
|
||||
if( value )
|
||||
this.storageClass = value
|
||||
return this
|
||||
}
|
||||
|
||||
S3BashLib withStorageEncryption(String value) {
|
||||
if( value )
|
||||
this.storageEncryption = value ? "--sse $value " : ''
|
||||
return this
|
||||
}
|
||||
|
||||
S3BashLib withStorageKmsKeyId(String value) {
|
||||
if( value )
|
||||
this.storageKmsKeyId = value ? "--sse-kms-key-id $value " : ''
|
||||
return this
|
||||
}
|
||||
|
||||
S3BashLib withS5cmdPath(String value) {
|
||||
this.s5cmdPath = value
|
||||
return this
|
||||
}
|
||||
|
||||
S3BashLib withAcl(ObjectCannedACL value) {
|
||||
if( value )
|
||||
this.acl = "--acl $value "
|
||||
return this
|
||||
}
|
||||
|
||||
S3BashLib withRequesterPays(Boolean value) {
|
||||
this.requesterPays = value ? "--request-payer requester " : ''
|
||||
return this
|
||||
}
|
||||
|
||||
S3BashLib withForceGlacierTransfer(Boolean value) {
|
||||
this.forceGlacierTransfer = value ? '--force-glacier-transfer ' : ''
|
||||
return this
|
||||
}
|
||||
|
||||
protected String retryEnv() {
|
||||
if( !retryMode )
|
||||
return ''
|
||||
"""
|
||||
# aws cli retry config
|
||||
export AWS_RETRY_MODE=${retryMode}
|
||||
export AWS_MAX_ATTEMPTS=${maxTransferAttempts}
|
||||
""".stripIndent().rightTrim()
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement S3 upload/download helper using `aws s3` CLI tool
|
||||
*
|
||||
* @return The Bash script implementing the S3 helper functions
|
||||
*/
|
||||
protected String s3Lib() {
|
||||
"""
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=\$1
|
||||
local s3path=\$2
|
||||
if [[ "\$name" == - ]]; then
|
||||
$cli s3 cp --only-show-errors ${debug}${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass - "\$s3path"
|
||||
elif [[ -d "\$name" ]]; then
|
||||
$cli s3 cp --only-show-errors --recursive ${debug}${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass "\$name" "\$s3path/\$name"
|
||||
else
|
||||
$cli s3 cp --only-show-errors ${debug}${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass "\$name" "\$s3path/\$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=\$1
|
||||
local target=\$2
|
||||
local file_name=\$(basename \$1)
|
||||
local is_dir=\$($cli s3 ls \$source | grep -F "PRE \${file_name}/" -c)
|
||||
if [[ \$is_dir == 1 ]]; then
|
||||
$cli s3 cp --only-show-errors --recursive ${forceGlacierTransfer}"\$source" "\$target"
|
||||
else
|
||||
$cli s3 cp --only-show-errors "\$source" "\$target"
|
||||
fi
|
||||
}
|
||||
""".stripIndent(true)
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement S3 upload/download helper using s3cmd CLI tool
|
||||
* https://github.com/peak/s5cmd
|
||||
*
|
||||
* @return The Bash script implementing the S3 helper functions
|
||||
*/
|
||||
protected String s5cmdLib() {
|
||||
final cli = s5cmdPath
|
||||
"""
|
||||
# aws helper for s5cmd
|
||||
nxf_s3_upload() {
|
||||
local name=\$1
|
||||
local s3path=\$2
|
||||
if [[ "\$name" == - ]]; then
|
||||
local tmp=\$(nxf_mktemp)
|
||||
cp /dev/stdin \$tmp/\$name
|
||||
$cli cp ${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass \$tmp/\$name "\$s3path"
|
||||
elif [[ -d "\$name" ]]; then
|
||||
$cli cp ${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass "\$name/" "\$s3path/\$name/"
|
||||
else
|
||||
$cli cp ${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass "\$name" "\$s3path/\$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=\$1
|
||||
local target=\$2
|
||||
local file_name=\$(basename \$1)
|
||||
local is_dir=\$($cli ls \$source | grep -F "DIR \${file_name}/" -c)
|
||||
if [[ \$is_dir == 1 ]]; then
|
||||
$cli cp "\$source/*" "\$target"
|
||||
else
|
||||
$cli cp "\$source" "\$target"
|
||||
fi
|
||||
}
|
||||
""".stripIndent()
|
||||
}
|
||||
|
||||
@Override
|
||||
String render() {
|
||||
return s5cmdPath
|
||||
? super.render() + s5cmdLib()
|
||||
: super.render() + retryEnv() + s3Lib()
|
||||
}
|
||||
|
||||
static private S3BashLib lib0(AwsOptions opts, boolean includeCore) {
|
||||
new S3BashLib()
|
||||
.includeCoreFun(includeCore)
|
||||
.withMaxParallelTransfers( opts.maxParallelTransfers )
|
||||
.withDelayBetweenAttempts(opts.delayBetweenAttempts )
|
||||
.withMaxTransferAttempts( opts.maxTransferAttempts )
|
||||
.withCliPath( opts.awsCli )
|
||||
.withStorageClass(opts.storageClass )
|
||||
.withStorageEncryption( opts.storageEncryption )
|
||||
.withStorageKmsKeyId( opts.storageKmsKeyId )
|
||||
.withRetryMode( opts.retryMode )
|
||||
.withDebug( opts.debug )
|
||||
.withS5cmdPath( opts.s5cmdPath )
|
||||
.withAcl( opts.s3Acl )
|
||||
.withRequesterPays( opts.requesterPays )
|
||||
.withForceGlacierTransfer( opts.forceGlacierTransfer )
|
||||
}
|
||||
|
||||
static String script(AwsOptions opts) {
|
||||
lib0(opts,true).render()
|
||||
}
|
||||
|
||||
static String script() {
|
||||
final opts = new AwsOptions(Global.session as Session)
|
||||
lib0(opts,false).render()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.util
|
||||
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentials
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider
|
||||
import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.util.logging.Slf4j
|
||||
/**
|
||||
* AWS credentials provider that delegates the credentials to the
|
||||
* specified provider class and fallback to the {@link AnonymousCredentialsProvider}
|
||||
* when no credentials are available.
|
||||
*
|
||||
* See also {@link software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain}
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@Slf4j
|
||||
@CompileStatic
|
||||
class S3CredentialsProvider implements AwsCredentialsProvider {
|
||||
|
||||
private AwsCredentialsProvider target
|
||||
|
||||
private volatile AwsCredentials anonymous
|
||||
|
||||
S3CredentialsProvider(AwsCredentialsProvider target) {
|
||||
this.target = target
|
||||
}
|
||||
|
||||
@Override
|
||||
AwsCredentials resolveCredentials() {
|
||||
if (anonymous != null) {
|
||||
return anonymous
|
||||
}
|
||||
try {
|
||||
return target.resolveCredentials()
|
||||
} catch (Exception e) {
|
||||
log.debug("No AWS credentials available - falling back to anonymous access")
|
||||
}
|
||||
anonymous = AnonymousCredentialsProvider.create().resolveCredentials()
|
||||
return anonymous
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package nextflow.cloud.aws.util
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import nextflow.cloud.aws.nio.S3Path
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.util.logging.Slf4j
|
||||
import nextflow.Global
|
||||
import nextflow.cloud.aws.batch.AwsBatchFileCopyStrategy
|
||||
import nextflow.file.FileHelper
|
||||
import nextflow.file.FileSystemPathFactory
|
||||
/**
|
||||
* Implements the a factory strategy to parse and build S3 path URIs
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@Slf4j
|
||||
@CompileStatic
|
||||
class S3PathFactory extends FileSystemPathFactory {
|
||||
|
||||
@Override
|
||||
protected Path parseUri(String str) {
|
||||
// normalise 's3' path
|
||||
if( str.startsWith('s3://') && str[5]!='/' ) {
|
||||
final path = "s3:///${str.substring(5)}"
|
||||
return create(path)
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
static private Map config() {
|
||||
final result = Global.config?.get('aws') as Map
|
||||
return result != null ? result : Collections.emptyMap()
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String toUriString(Path path) {
|
||||
return path instanceof S3Path ? "s3:/$path".toString() : null
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getBashLib(Path target) {
|
||||
return S3BashLib.script()
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getUploadCmd(String source, Path target) {
|
||||
return target instanceof S3Path
|
||||
? AwsBatchFileCopyStrategy.uploadCmd(source,target)
|
||||
: null
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link S3Path} from a S3 formatted URI.
|
||||
*
|
||||
* @param path
|
||||
* A S3 URI path e.g. s3:///BUCKET_NAME/some/data.
|
||||
* NOTE it expect the s3 prefix provided with triple `/` .
|
||||
* This is required by the underlying implementation expecting the host name in the URI to be empty
|
||||
* and the bucket name to be the first path element
|
||||
* @return
|
||||
* The corresponding {@link S3Path}
|
||||
*/
|
||||
static S3Path create(String path) {
|
||||
if( !path ) throw new IllegalArgumentException("Missing S3 path argument")
|
||||
if( !path.startsWith('s3:///') ) throw new IllegalArgumentException("S3 path must start with s3:/// prefix -- offending value '$path'")
|
||||
// note: this URI constructor parse the path parameter and extract the `scheme` and `authority` components
|
||||
final uri = new URI(null,null, path,null,null)
|
||||
return (S3Path)FileHelper.getOrCreateFileSystemFor(uri,config()).provider().getPath(uri)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.util
|
||||
|
||||
|
||||
import com.esotericsoftware.kryo.Kryo
|
||||
import com.esotericsoftware.kryo.Serializer
|
||||
import com.esotericsoftware.kryo.io.Input
|
||||
import com.esotericsoftware.kryo.io.Output
|
||||
import nextflow.cloud.aws.nio.S3Path
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.util.logging.Slf4j
|
||||
import nextflow.util.SerializerRegistrant
|
||||
import org.pf4j.Extension
|
||||
/**
|
||||
* Register the S3Path serializer
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@Slf4j
|
||||
@Extension
|
||||
@CompileStatic
|
||||
class S3PathSerializer extends Serializer<S3Path> implements SerializerRegistrant {
|
||||
|
||||
@Override
|
||||
void register(Map<Class, Object> serializers) {
|
||||
serializers.put(S3Path, S3PathSerializer)
|
||||
}
|
||||
|
||||
@Override
|
||||
void write(Kryo kryo, Output output, S3Path target) {
|
||||
final scheme = target.getFileSystem().provider().getScheme()
|
||||
final path = target.toString()
|
||||
log.trace "S3Path serialization > scheme: $scheme; path: $path"
|
||||
output.writeString(scheme)
|
||||
output.writeString(path)
|
||||
}
|
||||
|
||||
@Override
|
||||
S3Path read(Kryo kryo, Input input, Class<S3Path> type) {
|
||||
final scheme = input.readString()
|
||||
final path = input.readString()
|
||||
if( scheme != 's3' ) throw new IllegalStateException("Unexpected scheme for S3 path -- offending value '$scheme'")
|
||||
log.trace "S3Path de-serialization > scheme: $scheme; path: $path"
|
||||
return (S3Path) S3PathFactory.create("s3://${path}")
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
#
|
||||
# Copyright 2013-2026, Seqera Labs
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
#
|
||||
# The MIT License (MIT)
|
||||
#
|
||||
# Copyright (c) 2014 Javier Arnaiz @arnaix
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
|
||||
# if not present, FileSystems.newFileSystem throw NotProviderFoundException
|
||||
nextflow.cloud.aws.nio.S3FileSystemProvider
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow
|
||||
|
||||
import spock.lang.Specification
|
||||
|
||||
import static test.ScriptHelper.runDataflow
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class S3ChannelTest extends Specification {
|
||||
|
||||
def testFromPathS3() {
|
||||
|
||||
when:
|
||||
runDataflow {
|
||||
Channel.fromPath('s3://bucket/some/data.txt')
|
||||
}
|
||||
then:
|
||||
noExceptionThrown()
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow
|
||||
|
||||
import java.nio.file.Paths
|
||||
|
||||
import spock.lang.IgnoreIf
|
||||
import spock.lang.Requires
|
||||
import spock.lang.Specification
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class S3NextflowTest extends Specification {
|
||||
|
||||
def 'should return s3 uris'() {
|
||||
expect:
|
||||
Nextflow.file('s3://foo/data/file.log') == Paths.get(new URI('s3:///foo/data/file.log'))
|
||||
}
|
||||
|
||||
def 'should resolve rel paths against env base' () {
|
||||
given:
|
||||
SysEnv.push(NXF_FILE_ROOT: 's3://some/base/dir')
|
||||
|
||||
expect:
|
||||
Nextflow.file( 's3://abs/path/file.txt' ) == Paths.get(new URI('s3:///abs/path/file.txt'))
|
||||
and:
|
||||
Nextflow.file( 'file.txt' ) == Paths.get(new URI('s3:///some/base/dir/file.txt'))
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
}
|
||||
|
||||
@IgnoreIf({System.getenv('NXF_SMOKE')})
|
||||
@Requires({System.getenv('AWS_S3FS_ACCESS_KEY') && System.getenv('AWS_S3FS_SECRET_KEY')})
|
||||
def 'should resolve list of files' () {
|
||||
when:
|
||||
def result = Nextflow.files('s3://ngi-igenomes/*')
|
||||
then:
|
||||
result.size() == 3
|
||||
}
|
||||
|
||||
@IgnoreIf({System.getenv('NXF_SMOKE')})
|
||||
@Requires({System.getenv('AWS_S3FS_ACCESS_KEY') && System.getenv('AWS_S3FS_SECRET_KEY')})
|
||||
def 'should check s3 bucket exists files' () {
|
||||
when:
|
||||
def result = Nextflow.file('s3://ngi-igenomes/')
|
||||
then:
|
||||
result.exists() == true
|
||||
result.isDirectory() == true
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import nextflow.file.FileHelper
|
||||
import spock.lang.Specification
|
||||
import spock.lang.Unroll
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class S3SessionTest extends Specification {
|
||||
|
||||
@Unroll
|
||||
def 'should get cloud cache path' () {
|
||||
given:
|
||||
def session = Spy(Session)
|
||||
|
||||
expect:
|
||||
session.cloudCachePath(CONFIG, FileHelper.asPath(WORKDIR)) == EXPECTED
|
||||
|
||||
where:
|
||||
CONFIG | WORKDIR | EXPECTED
|
||||
null | '/foo' | null
|
||||
[enabled:true] | 's3://foo/work' | FileHelper.asPath('s3://foo/work')
|
||||
[enabled:true, path:'s3://this/that'] | '/foo' | FileHelper.asPath('s3://this/that')
|
||||
|
||||
}
|
||||
|
||||
|
||||
def 'should error with non-cloud bucket' () {
|
||||
given:
|
||||
def session = Spy(Session)
|
||||
|
||||
when:
|
||||
session.cloudCachePath([enabled:true], Path.of('/foo/dir'))
|
||||
then:
|
||||
def e = thrown(IllegalArgumentException)
|
||||
e.message == "Storage path not supported by Cloud-cache - offending value: '/foo/dir'"
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws
|
||||
|
||||
import nextflow.SysEnv
|
||||
import nextflow.cloud.aws.config.AwsConfig
|
||||
import spock.lang.Specification
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class AwsClientFactoryTest extends Specification {
|
||||
|
||||
def 'should create factory' () {
|
||||
given:
|
||||
SysEnv.push([:])
|
||||
when:
|
||||
def factory = new AwsClientFactory(new AwsConfig(accessKey: 'foo', secretKey: 'bar', region:'xyz', profile:'my-profile'))
|
||||
then:
|
||||
factory.accessKey() == 'foo'
|
||||
factory.secretKey() == 'bar'
|
||||
factory.region() == 'xyz'
|
||||
factory.profile() == 'my-profile'
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
}
|
||||
|
||||
def 'should create factory using environment' () {
|
||||
given:
|
||||
SysEnv.push([AWS_REGION:'eu-foo-1', AWS_PROFILE: 'profile-x'])
|
||||
when:
|
||||
def factory = new AwsClientFactory(new AwsConfig([:]))
|
||||
then:
|
||||
factory.accessKey() == null
|
||||
factory.secretKey() == null
|
||||
factory.region() == 'eu-foo-1'
|
||||
factory.profile() == 'profile-x'
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,383 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch
|
||||
import java.nio.file.Paths
|
||||
|
||||
import nextflow.processor.TaskBean
|
||||
import spock.lang.Specification
|
||||
import test.TestHelper
|
||||
|
||||
class AwsBatchFileCopyStrategyTest extends Specification {
|
||||
|
||||
def 'should strip out file/folder name from target S3 path' () {
|
||||
given:
|
||||
def OUTPUTS = ["outputs_*","final_folder"]
|
||||
def TARGET = Paths.get('/data/results')
|
||||
def FILE = Paths.get('/some/data/nobel_prize_results.gz')
|
||||
def EXIT = Paths.get('/some/path/.exitcode')
|
||||
def RUN = Paths.get('/some/data/.command.run')
|
||||
def copy = new AwsBatchFileCopyStrategy(Mock(TaskBean), new AwsOptions())
|
||||
expect:
|
||||
copy.touchFile(RUN) == "echo start | nxf_s3_upload - s3://some/data/.command.run"
|
||||
copy.copyFile("nobel_prize_results.gz",Paths.get("/some/data/nobel_prize_results.gz")) == "nxf_s3_upload nobel_prize_results.gz s3://some/data"
|
||||
copy.exitFile(EXIT) == "| nxf_s3_upload - s3://some/path/.exitcode || true"
|
||||
copy.stageInputFile(FILE, 'foo.txt') == """
|
||||
downloads+=("nxf_s3_download s3://some/data/nobel_prize_results.gz foo.txt")
|
||||
"""
|
||||
.stripIndent().trim()
|
||||
copy.getUnstageOutputFilesScript(OUTPUTS,TARGET) == '''
|
||||
uploads=()
|
||||
IFS=$'\\n'
|
||||
for name in $(eval "ls -1d outputs_* final_folder" | sort | uniq); do
|
||||
uploads+=("nxf_s3_upload '$name' s3://data/results")
|
||||
done
|
||||
unset IFS
|
||||
nxf_parallel "${uploads[@]}"
|
||||
'''
|
||||
.stripIndent().leftTrim()
|
||||
}
|
||||
|
||||
def 'should return unstage script' () {
|
||||
given:
|
||||
def copy = new AwsBatchFileCopyStrategy(Mock(TaskBean), new AwsOptions())
|
||||
def target = Paths.get('/foo/bar')
|
||||
|
||||
when:
|
||||
def script = copy.getUnstageOutputFilesScript(['file.txt'],target)
|
||||
then:
|
||||
script.trim() == '''
|
||||
uploads=()
|
||||
IFS=$'\\n'
|
||||
for name in $(eval "ls -1d file.txt" | sort | uniq); do
|
||||
uploads+=("nxf_s3_upload '$name' s3://foo/bar")
|
||||
done
|
||||
unset IFS
|
||||
nxf_parallel "${uploads[@]}"
|
||||
'''
|
||||
.stripIndent().trim()
|
||||
|
||||
when:
|
||||
script = copy.getUnstageOutputFilesScript(['file-*.txt'],target)
|
||||
then:
|
||||
script.trim() == '''
|
||||
uploads=()
|
||||
IFS=$'\\n'
|
||||
for name in $(eval "ls -1d file-*.txt" | sort | uniq); do
|
||||
uploads+=("nxf_s3_upload '$name' s3://foo/bar")
|
||||
done
|
||||
unset IFS
|
||||
nxf_parallel "${uploads[@]}"
|
||||
'''
|
||||
.stripIndent().trim()
|
||||
|
||||
when:
|
||||
script = copy.getUnstageOutputFilesScript(['file-[a,b].txt'],target)
|
||||
then:
|
||||
script.trim() == '''
|
||||
uploads=()
|
||||
IFS=$'\\n'
|
||||
for name in $(eval "ls -1d file-[a,b].txt" | sort | uniq); do
|
||||
uploads+=("nxf_s3_upload '$name' s3://foo/bar")
|
||||
done
|
||||
unset IFS
|
||||
nxf_parallel "${uploads[@]}"
|
||||
'''
|
||||
.stripIndent().trim()
|
||||
|
||||
when:
|
||||
script = copy.getUnstageOutputFilesScript(['file-01(A).txt', 'f o o.txt'],target)
|
||||
then:
|
||||
script.trim() == '''
|
||||
uploads=()
|
||||
IFS=$'\\n'
|
||||
for name in $(eval "ls -1d file-01\\(A\\).txt f\\ o\\ o.txt" | sort | uniq); do
|
||||
uploads+=("nxf_s3_upload '$name' s3://foo/bar")
|
||||
done
|
||||
unset IFS
|
||||
nxf_parallel "${uploads[@]}"
|
||||
'''
|
||||
.stripIndent().trim()
|
||||
}
|
||||
|
||||
def 'should check the beforeScript' () {
|
||||
|
||||
given:
|
||||
def bean = Mock(TaskBean)
|
||||
def opts = Mock(AwsOptions)
|
||||
AwsBatchFileCopyStrategy copy = Spy(AwsBatchFileCopyStrategy, constructorArgs: [bean, opts])
|
||||
|
||||
when:
|
||||
def script = copy.getBeforeStartScript()
|
||||
then:
|
||||
1 * opts.getAwsCli() >> 'aws'
|
||||
1 * opts.getStorageClass() >> null
|
||||
1 * opts.getStorageEncryption() >> null
|
||||
|
||||
script == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=1
|
||||
local timeout=10
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( \$attempt < \$max_attempts ))
|
||||
do
|
||||
if "\$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=\$?
|
||||
fi
|
||||
if [[ \$exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep \$timeout
|
||||
attempt=\$(( attempt + 1 ))
|
||||
timeout=\$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
|
||||
when:
|
||||
script = copy.getBeforeStartScript()
|
||||
then:
|
||||
1 * opts.getAwsCli() >> '/foo/aws'
|
||||
1 * opts.getStorageClass() >> 'STANDARD_IA'
|
||||
1 * opts.getStorageEncryption() >> 'AES256'
|
||||
|
||||
script == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=1
|
||||
local timeout=10
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( \$attempt < \$max_attempts ))
|
||||
do
|
||||
if "\$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=\$?
|
||||
fi
|
||||
if [[ \$exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep \$timeout
|
||||
attempt=\$(( attempt + 1 ))
|
||||
timeout=\$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
/foo/aws s3 cp --only-show-errors --sse AES256 --storage-class STANDARD_IA - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
/foo/aws s3 cp --only-show-errors --recursive --sse AES256 --storage-class STANDARD_IA "$name" "$s3path/$name"
|
||||
else
|
||||
/foo/aws s3 cp --only-show-errors --sse AES256 --storage-class STANDARD_IA "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(/foo/aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
/foo/aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
/foo/aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
def 'should return env variables' () {
|
||||
|
||||
given:
|
||||
def ENV = [FOO: 'hola', BAR:'world', PATH:'xxx']
|
||||
def bean = Mock(TaskBean)
|
||||
def opts = Mock(AwsOptions)
|
||||
AwsBatchFileCopyStrategy copy = Spy(AwsBatchFileCopyStrategy, constructorArgs: [bean, opts])
|
||||
|
||||
when:
|
||||
def script = copy.getEnvScript(ENV,false)
|
||||
then:
|
||||
// note: PATH is always removed
|
||||
opts.getRemoteBinDir() >> null
|
||||
opts.getCliPath() >> null
|
||||
script == '''
|
||||
export FOO="hola"
|
||||
export BAR="world"
|
||||
'''.stripIndent().leftTrim()
|
||||
|
||||
when:
|
||||
script = copy.getEnvScript(ENV,false)
|
||||
then:
|
||||
opts.getRemoteBinDir() >> '/foo/bar'
|
||||
opts.getAwsCli() >> 'aws'
|
||||
script == '''
|
||||
aws s3 cp --recursive --only-show-errors s3://foo/bar $PWD/nextflow-bin
|
||||
chmod +x $PWD/nextflow-bin/* || true
|
||||
export PATH=$PWD/nextflow-bin:$PATH
|
||||
export FOO="hola"
|
||||
export BAR="world"
|
||||
'''.stripIndent().leftTrim()
|
||||
|
||||
when:
|
||||
script = copy.getEnvScript(ENV,false)
|
||||
then:
|
||||
opts.getAwsCli() >> '/conda/bin/aws'
|
||||
opts.getRemoteBinDir() >> '/foo/bar'
|
||||
script == '''
|
||||
/conda/bin/aws s3 cp --recursive --only-show-errors s3://foo/bar $PWD/nextflow-bin
|
||||
chmod +x $PWD/nextflow-bin/* || true
|
||||
export PATH=$PWD/nextflow-bin:$PATH
|
||||
export FOO="hola"
|
||||
export BAR="world"
|
||||
'''.stripIndent().leftTrim()
|
||||
|
||||
when:
|
||||
script = copy.getEnvScript(ENV,false)
|
||||
then:
|
||||
opts.getAwsCli() >> '/conda/bin/aws'
|
||||
opts.getRemoteBinDir() >> '/foo/bar'
|
||||
opts.getRegion() >> 'eu-west-1'
|
||||
script == '''
|
||||
/conda/bin/aws s3 cp --recursive --only-show-errors s3://foo/bar $PWD/nextflow-bin
|
||||
chmod +x $PWD/nextflow-bin/* || true
|
||||
export PATH=$PWD/nextflow-bin:$PATH
|
||||
export FOO="hola"
|
||||
export BAR="world"
|
||||
'''.stripIndent().leftTrim()
|
||||
|
||||
}
|
||||
|
||||
|
||||
def 'should return stage input input file'() {
|
||||
given:
|
||||
def file = TestHelper.createInMemTempFile('foo.txt')
|
||||
|
||||
def bean = Mock(TaskBean)
|
||||
def opts = Mock(AwsOptions)
|
||||
def copy = new AwsBatchFileCopyStrategy(bean, opts)
|
||||
|
||||
when:
|
||||
def script = copy.stageInputFile( file, 'bar.txt')
|
||||
then:
|
||||
script == "downloads+=(\"nxf_s3_download s3:/$file bar.txt\")" as String
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch
|
||||
|
||||
import nextflow.cloud.types.PriceModel
|
||||
import software.amazon.awssdk.services.batch.BatchClient
|
||||
import software.amazon.awssdk.services.ec2.model.Instance
|
||||
import software.amazon.awssdk.services.ec2.model.InstanceType
|
||||
import software.amazon.awssdk.services.ec2.model.InstanceLifecycleType
|
||||
import spock.lang.Specification
|
||||
import spock.lang.Unroll
|
||||
|
||||
/**
|
||||
* Tests for AwsBatchHelper
|
||||
*
|
||||
* @author Rob Syme <rob.syme@seqera.io>
|
||||
*/
|
||||
class AwsBatchHelperTest extends Specification {
|
||||
|
||||
@Unroll
|
||||
def 'should detect spot instance pricing model'() {
|
||||
given:
|
||||
def helper = new AwsBatchHelper(Mock(BatchClient), null)
|
||||
def instance = Instance.builder()
|
||||
.instanceLifecycle(LIFECYCLE)
|
||||
.build()
|
||||
|
||||
when:
|
||||
def result = helper.getPrice(instance)
|
||||
|
||||
then:
|
||||
result == EXPECTED
|
||||
|
||||
where:
|
||||
LIFECYCLE | EXPECTED
|
||||
InstanceLifecycleType.SPOT | PriceModel.spot
|
||||
InstanceLifecycleType.SCHEDULED | PriceModel.standard
|
||||
null | PriceModel.standard // on-demand instances return null
|
||||
}
|
||||
|
||||
def 'should preserve raw aws instance type values'() {
|
||||
given:
|
||||
def helper = new AwsBatchHelper(Mock(BatchClient), null)
|
||||
|
||||
expect:
|
||||
helper.getInstanceType(INSTANCE) == TYPE
|
||||
|
||||
where:
|
||||
TYPE | _
|
||||
'm4.large' | _
|
||||
'r8id.xlarge' | _
|
||||
and:
|
||||
INSTANCE = Instance.builder().instanceType(InstanceType.fromValue(TYPE)).instanceType(TYPE).build()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch
|
||||
|
||||
import software.amazon.awssdk.services.batch.BatchClient
|
||||
import software.amazon.awssdk.services.batch.model.DescribeJobDefinitionsRequest
|
||||
import software.amazon.awssdk.services.batch.model.DescribeJobDefinitionsResponse
|
||||
import software.amazon.awssdk.services.batch.model.DescribeJobsRequest
|
||||
import software.amazon.awssdk.services.batch.model.DescribeJobsResponse
|
||||
import nextflow.util.ThrottlingExecutor
|
||||
import spock.lang.Specification
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class AwsBatchProxyTest extends Specification {
|
||||
|
||||
def 'should get client instance' () {
|
||||
|
||||
given:
|
||||
def client = Mock(BatchClient)
|
||||
def exec = Mock(ThrottlingExecutor)
|
||||
|
||||
when:
|
||||
def c = new AwsBatchProxy(client,exec).client
|
||||
then:
|
||||
0 * exec._
|
||||
c == client
|
||||
|
||||
when:
|
||||
def d = new AwsBatchProxy(client,exec).getClient()
|
||||
then:
|
||||
0 * exec._
|
||||
d == client
|
||||
|
||||
}
|
||||
|
||||
def 'should invoke executor with normal priority' () {
|
||||
|
||||
given:
|
||||
def client = Mock(BatchClient)
|
||||
def exec = Mock(ThrottlingExecutor)
|
||||
def req = DescribeJobDefinitionsRequest.builder().build() as DescribeJobDefinitionsRequest
|
||||
def resp = DescribeJobDefinitionsResponse.builder().build()
|
||||
def ZERO = 0 as byte
|
||||
|
||||
when:
|
||||
def result = new AwsBatchProxy(client,exec).describeJobDefinitions(req)
|
||||
then:
|
||||
1 * exec.doInvoke1(client, 'describeJobDefinitions', [req] as Object[], ZERO) >> resp
|
||||
|
||||
result == resp
|
||||
|
||||
}
|
||||
|
||||
def 'should invoke executor with higher priority' () {
|
||||
|
||||
given:
|
||||
def client = Mock(BatchClient)
|
||||
def exec = Mock(ThrottlingExecutor)
|
||||
def req = DescribeJobsRequest.builder().build() as DescribeJobsRequest
|
||||
def resp = DescribeJobsResponse.builder().build()
|
||||
def _10 = 10 as byte
|
||||
|
||||
when:
|
||||
def result = new AwsBatchProxy(client,exec).describeJobs(req)
|
||||
then:
|
||||
1 * exec.doInvoke1(client, 'describeJobs', [req] as Object[], _10) >> resp
|
||||
|
||||
result == resp
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,677 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch
|
||||
|
||||
import java.nio.file.FileSystems
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Paths
|
||||
|
||||
import nextflow.Session
|
||||
import nextflow.SysEnv
|
||||
import nextflow.cloud.aws.config.AwsConfig
|
||||
import nextflow.cloud.aws.util.S3PathFactory
|
||||
import nextflow.container.DockerConfig
|
||||
import nextflow.processor.TaskBean
|
||||
import nextflow.util.Duration
|
||||
import spock.lang.Specification
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class AwsBatchScriptLauncherTest extends Specification {
|
||||
|
||||
def setup() {
|
||||
new Session()
|
||||
}
|
||||
|
||||
def 'test bash wrapper with input'() {
|
||||
/*
|
||||
* simple bash run
|
||||
*/
|
||||
when:
|
||||
def cfg = new AwsConfig(region: 'eu-west-1', batch: [cliPath:'/conda/bin/aws', retryMode: 'built-in'])
|
||||
def opts = new AwsOptions(awsConfig: cfg)
|
||||
def binding = new AwsBatchScriptLauncher([
|
||||
name: 'Hello 1',
|
||||
workDir: Paths.get('/work/dir'),
|
||||
script: 'echo Hello world!',
|
||||
environment: [FOO: 1, BAR:'any'],
|
||||
input: 'Ciao ciao' ] as TaskBean, opts) .makeBinding()
|
||||
|
||||
then:
|
||||
binding.unstage_controls == '''\
|
||||
nxf_s3_upload .command.out s3://work/dir || true
|
||||
nxf_s3_upload .command.err s3://work/dir || true
|
||||
'''.stripIndent()
|
||||
|
||||
binding.launch_cmd == '/bin/bash -ue .command.sh < .command.in'
|
||||
binding.unstage_outputs == null
|
||||
|
||||
binding.helpers_script == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=5
|
||||
local timeout=10
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( \$attempt < \$max_attempts ))
|
||||
do
|
||||
if "\$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=\$?
|
||||
fi
|
||||
if [[ \$exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep \$timeout
|
||||
attempt=\$(( attempt + 1 ))
|
||||
timeout=\$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n\'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
/conda/bin/aws --region eu-west-1 s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
/conda/bin/aws --region eu-west-1 s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
/conda/bin/aws --region eu-west-1 s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(/conda/bin/aws --region eu-west-1 s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
/conda/bin/aws --region eu-west-1 s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
/conda/bin/aws --region eu-west-1 s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
def 'should create task environment' () {
|
||||
/*
|
||||
* simple bash run
|
||||
*/
|
||||
when:
|
||||
def bucket = Paths.get('/bucket/work')
|
||||
def opts = new AwsOptions(remoteBinDir: '/bucket/bin', awsConfig: new AwsConfig([:]))
|
||||
|
||||
def binding = new AwsBatchScriptLauncher([
|
||||
name: 'Hello 1',
|
||||
workDir: bucket,
|
||||
targetDir: bucket,
|
||||
environment: [PATH:'/this:/that', FOO: 'xxx'],
|
||||
script: 'echo Hello world!' ] as TaskBean, opts) .makeBinding()
|
||||
|
||||
then:
|
||||
binding.task_env == '''\
|
||||
aws s3 cp --recursive --only-show-errors s3://bucket/bin $PWD/nextflow-bin
|
||||
chmod +x $PWD/nextflow-bin/* || true
|
||||
export PATH=$PWD/nextflow-bin:$PATH
|
||||
export FOO="xxx"
|
||||
'''.stripIndent()
|
||||
}
|
||||
|
||||
def 'should cleanup temp files' () {
|
||||
|
||||
when:
|
||||
def bucket = Paths.get('/bucket/work')
|
||||
def opts = new AwsOptions(remoteBinDir: '/bucket/bin', awsConfig: new AwsConfig([:]))
|
||||
|
||||
def binding = new AwsBatchScriptLauncher([
|
||||
name: 'Hello 1',
|
||||
workDir: bucket,
|
||||
targetDir: bucket,
|
||||
environment: [PATH:'/this:/that', FOO: 'xxx'],
|
||||
script: 'echo Hello world!' ] as TaskBean, opts) .makeBinding()
|
||||
|
||||
then:
|
||||
binding.cleanup_cmd == 'rm -rf $NXF_SCRATCH || true\n'
|
||||
}
|
||||
|
||||
def 'test bash wrapper with outputs and stats'() {
|
||||
|
||||
/*
|
||||
* simple bash run
|
||||
*/
|
||||
when:
|
||||
def bucket = Paths.get('/bucket/work')
|
||||
def opts = new AwsOptions(awsConfig: new AwsConfig(batch: [retryMode: 'built-in']))
|
||||
|
||||
def binding = new AwsBatchScriptLauncher([
|
||||
name: 'Hello 1',
|
||||
workDir: bucket,
|
||||
targetDir: bucket,
|
||||
statsEnabled: true,
|
||||
outputFiles: ['foo.txt', 'bar.fastq'],
|
||||
script: 'echo Hello world!',
|
||||
input: 'Ciao ciao' ] as TaskBean, opts) .makeBinding()
|
||||
|
||||
then:
|
||||
|
||||
binding.unstage_controls == '''\
|
||||
nxf_s3_upload .command.out s3://bucket/work || true
|
||||
nxf_s3_upload .command.err s3://bucket/work || true
|
||||
nxf_s3_upload .command.trace s3://bucket/work || true
|
||||
'''.stripIndent()
|
||||
|
||||
binding.stage_inputs == '''\
|
||||
# stage input files
|
||||
downloads=(true)
|
||||
rm -f .command.sh
|
||||
rm -f .command.run
|
||||
rm -f .command.in
|
||||
downloads+=("nxf_cp_retry nxf_s3_download s3://bucket/work/.command.sh .command.sh")
|
||||
downloads+=("nxf_cp_retry nxf_s3_download s3://bucket/work/.command.run .command.run")
|
||||
downloads+=("nxf_cp_retry nxf_s3_download s3://bucket/work/.command.in .command.in")
|
||||
nxf_parallel "${downloads[@]}"
|
||||
'''.stripIndent()
|
||||
|
||||
binding.unstage_outputs == '''
|
||||
uploads=()
|
||||
IFS=$'\\n'
|
||||
for name in $(eval "ls -1d foo.txt bar.fastq" | sort | uniq); do
|
||||
uploads+=("nxf_s3_upload '$name' s3://bucket/work")
|
||||
done
|
||||
unset IFS
|
||||
nxf_parallel "${uploads[@]}"
|
||||
'''.stripIndent().leftTrim()
|
||||
|
||||
binding.launch_cmd == '/bin/bash .command.run nxf_trace'
|
||||
|
||||
binding.task_env == ''
|
||||
|
||||
binding.helpers_script == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=5
|
||||
local timeout=10
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( \$attempt < \$max_attempts ))
|
||||
do
|
||||
if "\$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=\$?
|
||||
fi
|
||||
if [[ \$exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep \$timeout
|
||||
attempt=\$(( attempt + 1 ))
|
||||
timeout=\$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n\'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
|
||||
'''.stripIndent(true)
|
||||
|
||||
}
|
||||
|
||||
|
||||
def 'test bash wrapper with custom scratch'() {
|
||||
|
||||
given:
|
||||
def folder = Files.createTempDirectory('test')
|
||||
|
||||
/*
|
||||
* simple bash run
|
||||
*/
|
||||
when:
|
||||
def opts = new AwsOptions(awsConfig: new AwsConfig(aws:[batch:[cliPath:'/conda/bin/aws', region: 'eu-west-1']]))
|
||||
def bash = new AwsBatchScriptLauncher([
|
||||
name: 'Hello 1',
|
||||
workDir: folder,
|
||||
script: 'echo Hello world!',
|
||||
scratch: '/foo/bar/tmp'
|
||||
] as TaskBean, opts)
|
||||
bash.build()
|
||||
|
||||
then:
|
||||
Files.exists(folder.resolve('.command.sh'))
|
||||
Files.exists(folder.resolve('.command.run'))
|
||||
|
||||
folder.resolve('.command.run').text.contains('NXF_SCRATCH="$(set +u; nxf_mktemp /foo/bar/tmp)"')
|
||||
|
||||
cleanup:
|
||||
folder?.deleteDir()
|
||||
}
|
||||
|
||||
def 'test should disable scratch'() {
|
||||
|
||||
given:
|
||||
def folder = Files.createTempDirectory('test')
|
||||
|
||||
/*
|
||||
* simple bash run
|
||||
*/
|
||||
when:
|
||||
def cfg = new AwsConfig(batch: [cliPath:'/conda/bin/aws'], region: 'eu-west-1')
|
||||
def opts = new AwsOptions(awsConfig: cfg)
|
||||
def bash = new AwsBatchScriptLauncher([
|
||||
name: 'Hello 1',
|
||||
workDir: folder,
|
||||
script: 'echo Hello world!',
|
||||
scratch: false
|
||||
] as TaskBean, opts)
|
||||
bash.build()
|
||||
|
||||
then:
|
||||
Files.exists(folder.resolve('.command.sh'))
|
||||
Files.exists(folder.resolve('.command.run'))
|
||||
|
||||
folder.resolve('.command.run').text.contains("NXF_SCRATCH=''")
|
||||
|
||||
cleanup:
|
||||
folder?.deleteDir()
|
||||
}
|
||||
|
||||
def 'test download retry enabled'() {
|
||||
|
||||
/*
|
||||
* simple bash run
|
||||
*/
|
||||
when:
|
||||
def bucket = Paths.get('/bucket/work')
|
||||
def cfg = new AwsConfig(batch: [maxTransferAttempts:3, delayBetweenAttempts: '9 sec' as Duration, retryMode: 'built-in'])
|
||||
def opts = new AwsOptions(awsConfig: cfg)
|
||||
|
||||
def binding = new AwsBatchScriptLauncher([
|
||||
name: 'Hello 1',
|
||||
workDir: bucket,
|
||||
// targetDir: bucket,
|
||||
script: 'echo Hello world!',
|
||||
] as TaskBean, opts) .makeBinding()
|
||||
|
||||
then:
|
||||
|
||||
binding.stage_inputs == '''\
|
||||
# stage input files
|
||||
downloads=(true)
|
||||
rm -f .command.sh
|
||||
downloads+=("nxf_cp_retry nxf_s3_download s3://bucket/work/.command.sh .command.sh")
|
||||
nxf_parallel "${downloads[@]}"
|
||||
'''.stripIndent()
|
||||
|
||||
binding.helpers_script == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=3
|
||||
local timeout=9
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( \$attempt < \$max_attempts ))
|
||||
do
|
||||
if "\$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=\$?
|
||||
fi
|
||||
if [[ \$exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep \$timeout
|
||||
attempt=\$(( attempt + 1 ))
|
||||
timeout=\$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n\'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
|
||||
'''.stripIndent(true)
|
||||
|
||||
}
|
||||
|
||||
def 'should aws cli native retry'() {
|
||||
/*
|
||||
* simple bash run
|
||||
*/
|
||||
when:
|
||||
def bucket = Paths.get('/bucket/work')
|
||||
def cfg = new AwsConfig(batch: [maxTransferAttempts: 3, retryMode: 'adaptive', delayBetweenAttempts: '9 sec' as Duration])
|
||||
def opts = new AwsOptions(awsConfig: cfg)
|
||||
|
||||
def binding = new AwsBatchScriptLauncher([
|
||||
name: 'Hello 1',
|
||||
workDir: bucket,
|
||||
// targetDir: bucket,
|
||||
script: 'echo Hello world!',
|
||||
] as TaskBean, opts) .makeBinding()
|
||||
|
||||
then:
|
||||
|
||||
binding.stage_inputs == '''\
|
||||
# stage input files
|
||||
downloads=(true)
|
||||
rm -f .command.sh
|
||||
downloads+=("nxf_s3_download s3://bucket/work/.command.sh .command.sh")
|
||||
nxf_parallel "${downloads[@]}"
|
||||
'''.stripIndent()
|
||||
|
||||
binding.helpers_script == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=3
|
||||
local timeout=9
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( \$attempt < \$max_attempts ))
|
||||
do
|
||||
if "\$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=\$?
|
||||
fi
|
||||
if [[ \$exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep \$timeout
|
||||
attempt=\$(( attempt + 1 ))
|
||||
timeout=\$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n\'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws cli retry config
|
||||
export AWS_RETRY_MODE=adaptive
|
||||
export AWS_MAX_ATTEMPTS=3
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
|
||||
'''.stripIndent(true)
|
||||
|
||||
}
|
||||
|
||||
|
||||
def 'should include fix ownership command' () {
|
||||
given:
|
||||
def cfg = new AwsConfig(batch: [cliPath:'/conda/bin/aws'], region: 'eu-west-1')
|
||||
def opts = new AwsOptions(awsConfig: cfg)
|
||||
def builder = new AwsBatchScriptLauncher([
|
||||
name: 'Hello 1',
|
||||
workDir: Paths.get('/work/dir'),
|
||||
script: 'echo Hello world!',
|
||||
containerConfig: new DockerConfig(fixOwnership: true),
|
||||
input: 'Ciao ciao' ] as TaskBean, opts)
|
||||
|
||||
when:
|
||||
def binding = builder.makeBinding()
|
||||
then:
|
||||
binding.fix_ownership == '[ ${NXF_OWNER:=\'\'} ] && (shopt -s extglob; GLOBIGNORE=\'..\'; chown -fR --from root $NXF_OWNER /work/dir/{*,.*}) || true'
|
||||
|
||||
}
|
||||
|
||||
def 'should not create separate stage script' () {
|
||||
given:
|
||||
SysEnv.push([NXF_WRAPPER_STAGE_FILE_THRESHOLD: '100'])
|
||||
and:
|
||||
def workDir = S3PathFactory.parse('s3://my-bucket/work')
|
||||
and:
|
||||
def inputFiles = [
|
||||
'sample_1.fq': Paths.get('/my-bucket/data/sample_1.fq'),
|
||||
'sample_2.fq': Paths.get('/my-bucket/data/sample_2.fq'),
|
||||
]
|
||||
def stageScript = '''\
|
||||
# stage input files
|
||||
downloads=(true)
|
||||
rm -f sample_1.fq
|
||||
rm -f sample_2.fq
|
||||
rm -f .command.sh
|
||||
downloads+=("nxf_s3_download s3://my-bucket/data/sample_1.fq sample_1.fq")
|
||||
downloads+=("nxf_s3_download s3://my-bucket/data/sample_2.fq sample_2.fq")
|
||||
downloads+=("nxf_s3_download s3://my-bucket/work/.command.sh .command.sh")
|
||||
nxf_parallel "${downloads[@]}"
|
||||
'''.stripIndent()
|
||||
and:
|
||||
def bean = [
|
||||
workDir: workDir,
|
||||
targetDir: workDir,
|
||||
inputFiles: inputFiles,
|
||||
script: 'echo Hello world!'
|
||||
] as TaskBean
|
||||
def opts = new AwsOptions()
|
||||
def builder = new AwsBatchScriptLauncher(bean, opts)
|
||||
|
||||
when:
|
||||
def binding = builder.makeBinding()
|
||||
then:
|
||||
binding.stage_inputs == stageScript
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
}
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,170 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch
|
||||
|
||||
import nextflow.util.CmdLineHelper
|
||||
import software.amazon.awssdk.services.batch.model.Tmpfs
|
||||
import software.amazon.awssdk.services.batch.model.Ulimit
|
||||
import spock.lang.Specification
|
||||
|
||||
/**
|
||||
* @author Manuele Simi <manuele.simi@gmail.com>
|
||||
*/
|
||||
class AwsContainerOptionsMapperTest extends Specification {
|
||||
|
||||
def 'should set env vars'() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('--env VAR_FOO -e VAR_FOO2=value2 --env VAR_FOO3=value3')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
def environment = properties.environment
|
||||
environment.size() == 3
|
||||
environment.get(0).name() == 'VAR_FOO'
|
||||
environment.get(0).value() == null
|
||||
environment.get(1).name() == 'VAR_FOO3'
|
||||
environment.get(1).value() == 'value3'
|
||||
environment.get(2).name() == 'VAR_FOO2'
|
||||
environment.get(2).value() == 'value2'
|
||||
}
|
||||
|
||||
def 'should set ulimits'() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('--ulimit nofile=1280:2560 --ulimit nproc=16:32')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.ulimits.size() == 2
|
||||
properties.ulimits.get(0) == Ulimit.builder().hardLimit(2560).name('nofile').softLimit(1280).build()
|
||||
properties.ulimits.get(1) == Ulimit.builder().hardLimit(32).name('nproc').softLimit(16).build()
|
||||
|
||||
}
|
||||
|
||||
def 'should set user'() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('--user nf-user')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.user == 'nf-user'
|
||||
}
|
||||
|
||||
def 'should set privileged'() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('--privileged')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.privileged
|
||||
}
|
||||
|
||||
def 'should set readonly'() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('--read-only')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.readonlyRootFilesystem
|
||||
}
|
||||
|
||||
def 'should set env'() {
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('-e x=y')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.environment.get(0).name()=='x'
|
||||
properties.environment.get(0).value()=='y'
|
||||
}
|
||||
|
||||
def 'should set tmpfs linux params'() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('--tmpfs /run:rw,noexec,nosuid,size=64 --tmpfs /app:ro,size=128')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.linuxParameters.tmpfs().get(0) == Tmpfs.builder().containerPath('/run').size(64).mountOptions(['rw', 'noexec', 'nosuid']).build()
|
||||
properties.linuxParameters.tmpfs().get(1) == Tmpfs.builder().containerPath('/app').size(128).mountOptions(['ro']).build()
|
||||
}
|
||||
|
||||
def 'should set memory swap '() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('--memory-swap 2048')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.linuxParameters.maxSwap() == 2048
|
||||
}
|
||||
|
||||
def 'should set shared memory size'() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('--shm-size 12048024')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.linuxParameters.sharedMemorySize() == 11
|
||||
}
|
||||
|
||||
def 'should set shared memory size with unit in MiB'() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('--shm-size 256m')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.linuxParameters.sharedMemorySize() == 256
|
||||
}
|
||||
|
||||
def 'should set shared memory size with unit in GiB'() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('--shm-size 1g')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.linuxParameters.sharedMemorySize() == 1024
|
||||
}
|
||||
|
||||
def 'should set memory swappiness'() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('--memory-swappiness 12048024')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.linuxParameters.swappiness() == 12048024
|
||||
}
|
||||
|
||||
def 'should set init'() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('--init')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.linuxParameters.initProcessEnabled()
|
||||
}
|
||||
|
||||
def 'should set no params'() {
|
||||
|
||||
when:
|
||||
def map = CmdLineHelper.parseGnuArgs('')
|
||||
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
|
||||
then:
|
||||
properties.linuxParameters == null
|
||||
properties.ulimits == null
|
||||
properties.privileged == false
|
||||
properties.readonlyRootFilesystem == false
|
||||
properties.user == null
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,265 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch
|
||||
|
||||
import java.nio.file.Paths
|
||||
|
||||
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
|
||||
import nextflow.Session
|
||||
import nextflow.cloud.aws.config.AwsConfig
|
||||
import nextflow.exception.ProcessUnrecoverableException
|
||||
import spock.lang.Specification
|
||||
import spock.lang.Unroll
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class AwsOptionsTest extends Specification {
|
||||
|
||||
def 'should return aws cli' () {
|
||||
|
||||
given:
|
||||
AwsOptions opts
|
||||
|
||||
when:
|
||||
opts = new AwsOptions(awsConfig: new AwsConfig([:]))
|
||||
then:
|
||||
opts.awsCli == 'aws'
|
||||
|
||||
when:
|
||||
opts = new AwsOptions(awsConfig: new AwsConfig(batch: [cliPath: '/foo/bin/aws']))
|
||||
then:
|
||||
opts.awsCli == '/foo/bin/aws'
|
||||
|
||||
when:
|
||||
opts = new AwsOptions(awsConfig: new AwsConfig(region: 'eu-west-1', batch: [cliPath: '/foo/bin/aws']))
|
||||
then:
|
||||
opts.awsCli == '/foo/bin/aws --region eu-west-1'
|
||||
}
|
||||
|
||||
def 'should get max connection' () {
|
||||
given:
|
||||
def sess = Mock(Session) {
|
||||
getConfig() >> [aws:[batch:[maxParallelTransfers: 5]]]
|
||||
}
|
||||
AwsOptions opts
|
||||
|
||||
when:
|
||||
opts = new AwsOptions(awsConfig: new AwsConfig([:]))
|
||||
then:
|
||||
opts.maxParallelTransfers == AwsOptions.MAX_TRANSFER
|
||||
|
||||
when:
|
||||
opts = new AwsOptions(sess)
|
||||
then:
|
||||
opts.maxParallelTransfers == 5
|
||||
|
||||
}
|
||||
|
||||
def 'should get aws options' () {
|
||||
given:
|
||||
def sess = Mock(Session) {
|
||||
getConfig() >> [aws:
|
||||
[
|
||||
batch:[
|
||||
cliPath: '/foo/bin/aws',
|
||||
maxParallelTransfers: 5,
|
||||
maxTransferAttempts: 3,
|
||||
delayBetweenAttempts: '9 sec',
|
||||
jobRole: 'aws::foo::bar',
|
||||
volumes: '/foo,/this:/that'],
|
||||
client: [
|
||||
uploadStorageClass: 'STANDARD',
|
||||
storageEncryption: 'AES256'],
|
||||
region: 'aws-west-2'
|
||||
]
|
||||
]
|
||||
}
|
||||
|
||||
def exec = Mock(AwsBatchExecutor)
|
||||
exec.getSession() >> sess
|
||||
exec.getRemoteBinDir() >> Paths.get('/remote/bin/path')
|
||||
|
||||
when:
|
||||
def opts = new AwsOptions(sess)
|
||||
then:
|
||||
opts.maxParallelTransfers == 5
|
||||
opts.maxTransferAttempts == 3
|
||||
opts.delayBetweenAttempts.seconds == 9
|
||||
opts.storageClass == 'STANDARD'
|
||||
opts.storageEncryption == 'AES256'
|
||||
opts.region == 'aws-west-2'
|
||||
opts.jobRole == 'aws::foo::bar'
|
||||
opts.volumes == ['/foo','/this:/that']
|
||||
|
||||
when:
|
||||
opts = new AwsOptions(exec)
|
||||
then:
|
||||
opts.remoteBinDir == '/remote/bin/path'
|
||||
|
||||
}
|
||||
|
||||
def 'should set aws kms key' () {
|
||||
when:
|
||||
def sess1 = Mock(Session) {
|
||||
getConfig() >> [aws: [ client: [ storageKmsKeyId: 'my-kms-key']]]
|
||||
}
|
||||
and:
|
||||
def opts = new AwsOptions(sess1)
|
||||
then:
|
||||
opts.storageKmsKeyId == 'my-kms-key'
|
||||
opts.storageEncryption == null
|
||||
|
||||
when:
|
||||
def sess2 = Mock(Session) {
|
||||
getConfig() >> [aws: [ client: [ storageKmsKeyId: 'my-kms-key', storageEncryption: 'aws:kms']]]
|
||||
}
|
||||
and:
|
||||
def opts2 = new AwsOptions(sess2)
|
||||
then:
|
||||
opts2.storageKmsKeyId == 'my-kms-key'
|
||||
opts2.storageEncryption == 'aws:kms' // <-- allow explicit `storageEncryption`
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Unroll
|
||||
def 'should return aws options'() {
|
||||
given:
|
||||
def cfg = [
|
||||
aws: [client: [
|
||||
uploadStorageClass: awsStorClass,
|
||||
storageEncryption : awsStorEncrypt],
|
||||
batch: [ cliPath: awscliPath ]]
|
||||
]
|
||||
def session = new Session(cfg)
|
||||
|
||||
when:
|
||||
def opts = new AwsOptions(session)
|
||||
then:
|
||||
opts.cliPath == awscliPath
|
||||
opts.storageClass == awsStorClass
|
||||
opts.storageEncryption == awsStorEncrypt
|
||||
|
||||
where:
|
||||
awscliPath | awsStorClass | awsStorEncrypt
|
||||
null | null | null
|
||||
'/foo/bin/aws' | 'STANDARD' | 'AES256'
|
||||
|
||||
}
|
||||
|
||||
def 'should validate aws options' () {
|
||||
|
||||
when:
|
||||
def opts = new AwsOptions(awsConfig: new AwsConfig([:]))
|
||||
then:
|
||||
opts.getCliPath() == null
|
||||
opts.getStorageClass() == null
|
||||
opts.getStorageEncryption() == null
|
||||
|
||||
when:
|
||||
opts = new AwsOptions(awsConfig: new AwsConfig(batch: [cliPath: '/foo/bin/aws'], client: [storageClass: 'STANDARD', storageEncryption: 'AES256']))
|
||||
then:
|
||||
opts.getCliPath() == '/foo/bin/aws'
|
||||
opts.getStorageClass() == 'STANDARD'
|
||||
opts.getStorageEncryption() == 'AES256'
|
||||
|
||||
when:
|
||||
opts = new AwsOptions(awsConfig: new AwsConfig(client:[storageClass: 'foo']))
|
||||
then:
|
||||
opts.getStorageClass() == null
|
||||
|
||||
when:
|
||||
opts = new AwsOptions(awsConfig: new AwsConfig(client:[storageEncryption: 'abr']))
|
||||
then:
|
||||
opts.getStorageEncryption() == null
|
||||
|
||||
when:
|
||||
opts = new AwsOptions(awsConfig: new AwsConfig(client:[storageKmsKeyId: 'arn:aws:kms:eu-west-1:1234567890:key/e97ecf28-951e-4700-bf22-1bd416ec519f']))
|
||||
then:
|
||||
opts.getStorageKmsKeyId() == 'arn:aws:kms:eu-west-1:1234567890:key/e97ecf28-951e-4700-bf22-1bd416ec519f'
|
||||
|
||||
when:
|
||||
new AwsOptions(awsConfig: new AwsConfig(batch: [cliPath: 'bin/aws']))
|
||||
then:
|
||||
thrown(ProcessUnrecoverableException)
|
||||
|
||||
when:
|
||||
new AwsOptions(awsConfig: new AwsConfig(batch: [cliPath: '/foo/aws']))
|
||||
then:
|
||||
thrown(ProcessUnrecoverableException)
|
||||
}
|
||||
|
||||
def 'should add a volume' () {
|
||||
given:
|
||||
def opts = new AwsOptions(awsConfig: new AwsConfig([:]))
|
||||
|
||||
when:
|
||||
opts.addVolume(Paths.get('/some/dir'))
|
||||
then:
|
||||
opts.volumes == ['/some/dir']
|
||||
|
||||
when:
|
||||
opts.addVolume(Paths.get('/other/dir'))
|
||||
opts.addVolume(Paths.get('/other/dir'))
|
||||
then:
|
||||
opts.volumes == ['/some/dir', '/other/dir']
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should get aws cli path' () {
|
||||
def session = new Session(CONFIG)
|
||||
|
||||
when:
|
||||
def opts = new AwsOptions(session)
|
||||
then:
|
||||
opts.cliPath == S3CLI_PATH
|
||||
opts.s5cmdPath == S5CMD_PATH
|
||||
|
||||
where:
|
||||
CONFIG | S3CLI_PATH | S5CMD_PATH
|
||||
[aws:[batch:[:]]] | null | null
|
||||
[aws:[batch:[cliPath: '/usr/bin/aws']]] | '/usr/bin/aws' | null
|
||||
[aws:[batch:[cliPath: 's5cmd']]] | null | null
|
||||
[aws:[batch:[platformType: 'fargate', cliPath: 's5cmd']]] | null | 's5cmd'
|
||||
[aws:[batch:[platformType: 'fargate', cliPath: '/some/path/s5cmd']]] | null | '/some/path/s5cmd'
|
||||
[aws:[batch:[platformType: 'fargate', cliPath: 's5cmd --foo']]] | null | 's5cmd --foo'
|
||||
[aws:[batch:[platformType: 'fargate', cliPath: '/some/path/s5cmd --foo']]] | null | '/some/path/s5cmd --foo'
|
||||
}
|
||||
|
||||
def 'should parse s3 acl' ( ) {
|
||||
when:
|
||||
def opts = new AwsOptions(new Session(aws:[client:[s3Acl: 'PublicRead']]))
|
||||
then:
|
||||
opts.getS3Acl() == ObjectCannedACL.PUBLIC_READ
|
||||
|
||||
|
||||
when:
|
||||
opts = new AwsOptions(new Session(aws:[client:[s3Acl: 'public-read']]))
|
||||
then:
|
||||
opts.getS3Acl() == ObjectCannedACL.PUBLIC_READ
|
||||
|
||||
|
||||
when:
|
||||
opts = new AwsOptions(new Session(aws:[client:[s3Acl: 'unknown']]))
|
||||
then:
|
||||
thrown(IllegalArgumentException)
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,612 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch.model
|
||||
|
||||
import software.amazon.awssdk.services.batch.model.ContainerProperties
|
||||
import software.amazon.awssdk.services.batch.model.EphemeralStorage
|
||||
import software.amazon.awssdk.services.batch.model.KeyValuePair
|
||||
import software.amazon.awssdk.services.batch.model.LinuxParameters
|
||||
import software.amazon.awssdk.services.batch.model.LogConfiguration
|
||||
import software.amazon.awssdk.services.batch.model.MountPoint
|
||||
import software.amazon.awssdk.services.batch.model.NetworkConfiguration
|
||||
import software.amazon.awssdk.services.batch.model.ResourceRequirement
|
||||
import software.amazon.awssdk.services.batch.model.ResourceType
|
||||
import software.amazon.awssdk.services.batch.model.RuntimePlatform
|
||||
import software.amazon.awssdk.services.batch.model.Ulimit
|
||||
import software.amazon.awssdk.services.batch.model.Volume
|
||||
import spock.lang.Specification
|
||||
|
||||
/**
|
||||
* @author Nextflow Authors
|
||||
*/
|
||||
class ContainerPropertiesModelTest extends Specification {
|
||||
|
||||
def 'should create empty model'() {
|
||||
when:
|
||||
def model = new ContainerPropertiesModel()
|
||||
|
||||
then:
|
||||
model.image == null
|
||||
model.command == null
|
||||
model.resourceRequirements == null
|
||||
model.jobRoleArn == null
|
||||
model.executionRoleArn == null
|
||||
model.linuxParameters == null
|
||||
model.environment == null
|
||||
model.privileged == false
|
||||
model.user == null
|
||||
model.readonlyRootFilesystem == false
|
||||
model.ulimits == null
|
||||
model.logConfiguration == null
|
||||
model.mountPoints == null
|
||||
model.volumes == null
|
||||
model.networkConfiguration == null
|
||||
model.ephemeralStorage == null
|
||||
model.runtimePlatform == null
|
||||
}
|
||||
|
||||
def 'should set and get image'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
def result = model.image('ubuntu:20.04')
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.image == 'ubuntu:20.04'
|
||||
}
|
||||
|
||||
def 'should set and get command'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
def result = model.command('echo', 'hello', 'world')
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.command == ['echo', 'hello', 'world']
|
||||
model.command.size() == 3
|
||||
}
|
||||
|
||||
def 'should set and get resource requirements'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def req1 = ResourceRequirement.builder()
|
||||
.type(ResourceType.VCPU)
|
||||
.value('1')
|
||||
.build()
|
||||
def req2 = ResourceRequirement.builder()
|
||||
.type(ResourceType.MEMORY)
|
||||
.value('1024')
|
||||
.build()
|
||||
|
||||
when:
|
||||
def result = model.resourceRequirements(req1, req2)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.resourceRequirements.size() == 2
|
||||
model.resourceRequirements[0] == req1
|
||||
model.resourceRequirements[1] == req2
|
||||
}
|
||||
|
||||
def 'should set and get job role arn'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def arn = 'arn:aws:iam::123456789012:role/BatchJobRole'
|
||||
|
||||
when:
|
||||
def result = model.jobRoleArn(arn)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.jobRoleArn == arn
|
||||
}
|
||||
|
||||
def 'should set and get execution role arn'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def arn = 'arn:aws:iam::123456789012:role/BatchExecutionRole'
|
||||
|
||||
when:
|
||||
def result = model.executionRoleArn(arn)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.executionRoleArn == arn
|
||||
}
|
||||
|
||||
def 'should set and get user'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
def result = model.user('batch-user')
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.user == 'batch-user'
|
||||
}
|
||||
|
||||
def 'should set and get readonly root filesystem'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
def result = model.readonlyRootFilesystem(true)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.readonlyRootFilesystem == true
|
||||
}
|
||||
|
||||
def 'should set and get environment'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def env = [
|
||||
KeyValuePair.builder().name('VAR1').value('value1').build(),
|
||||
KeyValuePair.builder().name('VAR2').value('value2').build()
|
||||
] as ArrayList<KeyValuePair>
|
||||
|
||||
when:
|
||||
def result = model.environment(env)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.environment == env
|
||||
model.environment.size() == 2
|
||||
model.environment[0].name() == 'VAR1'
|
||||
model.environment[0].value() == 'value1'
|
||||
model.environment[1].name() == 'VAR2'
|
||||
model.environment[1].value() == 'value2'
|
||||
}
|
||||
|
||||
def 'should set and get linux parameters'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def linuxParams = LinuxParameters.builder()
|
||||
.initProcessEnabled(true)
|
||||
.build()
|
||||
|
||||
when:
|
||||
def result = model.linuxParameters(linuxParams)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.linuxParameters == linuxParams
|
||||
}
|
||||
|
||||
def 'should set and get privileged'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
def result = model.privileged(true)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.privileged == true
|
||||
}
|
||||
|
||||
def 'should set and get ulimits'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def ulimits = [
|
||||
Ulimit.builder().name('nofile').softLimit(1024).hardLimit(2048).build(),
|
||||
Ulimit.builder().name('nproc').softLimit(16).hardLimit(32).build()
|
||||
] as ArrayList<Ulimit>
|
||||
|
||||
when:
|
||||
def result = model.ulimits(ulimits)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.ulimits == ulimits
|
||||
model.ulimits.size() == 2
|
||||
model.ulimits[0].name() == 'nofile'
|
||||
model.ulimits[0].softLimit() == 1024
|
||||
model.ulimits[0].hardLimit() == 2048
|
||||
}
|
||||
|
||||
def 'should set and get log configuration'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def logConfig = LogConfiguration.builder()
|
||||
.logDriver('awslogs')
|
||||
.options(['awslogs-group': '/aws/batch/job'])
|
||||
.build()
|
||||
|
||||
when:
|
||||
def result = model.logConfiguration(logConfig)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.logConfiguration == logConfig
|
||||
}
|
||||
|
||||
def 'should set and get mount points'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def mountPoints = [
|
||||
MountPoint.builder()
|
||||
.sourceVolume('tmp')
|
||||
.containerPath('/tmp')
|
||||
.readOnly(false)
|
||||
.build()
|
||||
]
|
||||
|
||||
when:
|
||||
def result = model.mountPoints(mountPoints)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.mountPoints == mountPoints
|
||||
model.mountPoints.size() == 1
|
||||
model.mountPoints[0].sourceVolume() == 'tmp'
|
||||
model.mountPoints[0].containerPath() == '/tmp'
|
||||
model.mountPoints[0].readOnly() == false
|
||||
}
|
||||
|
||||
def 'should set and get volumes'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def volumes = [
|
||||
Volume.builder()
|
||||
.name('tmp')
|
||||
.build()
|
||||
]
|
||||
|
||||
when:
|
||||
def result = model.volumes(volumes)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.volumes == volumes
|
||||
model.volumes.size() == 1
|
||||
model.volumes[0].name() == 'tmp'
|
||||
}
|
||||
|
||||
def 'should set and get network configuration'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def networkConfig = NetworkConfiguration.builder()
|
||||
.assignPublicIp('ENABLED')
|
||||
.build()
|
||||
|
||||
when:
|
||||
def result = model.networkConfiguration(networkConfig)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.networkConfiguration == networkConfig
|
||||
}
|
||||
|
||||
def 'should set and get ephemeral storage'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def ephemeralStorage = EphemeralStorage.builder()
|
||||
.sizeInGiB(20)
|
||||
.build()
|
||||
|
||||
when:
|
||||
def result = model.ephemeralStorage(ephemeralStorage)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.ephemeralStorage == ephemeralStorage
|
||||
}
|
||||
|
||||
def 'should set and get runtime platform'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def runtimePlatform = RuntimePlatform.builder()
|
||||
.operatingSystemFamily('LINUX')
|
||||
.cpuArchitecture('X86_64')
|
||||
.build()
|
||||
|
||||
when:
|
||||
def result = model.runtimePlatform(runtimePlatform)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.runtimePlatform == runtimePlatform
|
||||
}
|
||||
|
||||
def 'should support method chaining'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def req = ResourceRequirement.builder()
|
||||
.type(ResourceType.VCPU)
|
||||
.value('1')
|
||||
.build()
|
||||
def env = [
|
||||
KeyValuePair.builder().name('VAR1').value('value1').build()
|
||||
] as ArrayList<KeyValuePair>
|
||||
|
||||
when:
|
||||
def result = model
|
||||
.image('ubuntu:20.04')
|
||||
.command('echo', 'hello')
|
||||
.resourceRequirements(req)
|
||||
.jobRoleArn('arn:aws:iam::123456789012:role/BatchJobRole')
|
||||
.executionRoleArn('arn:aws:iam::123456789012:role/BatchExecutionRole')
|
||||
.user('batch-user')
|
||||
.readonlyRootFilesystem(true)
|
||||
.environment(env)
|
||||
.privileged(false)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.image == 'ubuntu:20.04'
|
||||
model.command == ['echo', 'hello']
|
||||
model.resourceRequirements.size() == 1
|
||||
model.jobRoleArn == 'arn:aws:iam::123456789012:role/BatchJobRole'
|
||||
model.executionRoleArn == 'arn:aws:iam::123456789012:role/BatchExecutionRole'
|
||||
model.user == 'batch-user'
|
||||
model.readonlyRootFilesystem == true
|
||||
model.environment.size() == 1
|
||||
model.privileged == false
|
||||
}
|
||||
|
||||
def 'should generate proper toString'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def req = ResourceRequirement.builder()
|
||||
.type(ResourceType.VCPU)
|
||||
.value('1')
|
||||
.build()
|
||||
|
||||
when:
|
||||
model.image('ubuntu:20.04')
|
||||
.command('echo', 'hello')
|
||||
.resourceRequirements(req)
|
||||
.jobRoleArn('arn:aws:iam::123456789012:role/BatchJobRole')
|
||||
.privileged(true)
|
||||
.user('batch-user')
|
||||
|
||||
then:
|
||||
def toString = model.toString()
|
||||
toString.contains('ContainerPropertiesModel{')
|
||||
toString.contains("image='ubuntu:20.04'")
|
||||
toString.contains('command=[echo, hello]')
|
||||
toString.contains('resourceRequirements=')
|
||||
toString.contains("jobRoleArn='arn:aws:iam::123456789012:role/BatchJobRole'")
|
||||
toString.contains('privileged=true')
|
||||
toString.contains("user='batch-user'")
|
||||
}
|
||||
|
||||
def 'should handle null values in toString'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
def toString = model.toString()
|
||||
|
||||
then:
|
||||
toString.contains('ContainerPropertiesModel{')
|
||||
toString.contains("image='null'")
|
||||
toString.contains('command=null')
|
||||
toString.contains('resourceRequirements=null')
|
||||
toString.contains("jobRoleArn='null'")
|
||||
toString.contains('privileged=false')
|
||||
toString.contains("user='null'")
|
||||
toString.contains('readonlyRootFilesystem=false')
|
||||
}
|
||||
|
||||
def 'should handle empty collections'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
model.environment([] as ArrayList<KeyValuePair>)
|
||||
.ulimits([] as ArrayList<Ulimit>)
|
||||
.mountPoints([])
|
||||
.volumes([])
|
||||
|
||||
then:
|
||||
model.environment == []
|
||||
model.ulimits == []
|
||||
model.mountPoints == []
|
||||
model.volumes == []
|
||||
}
|
||||
|
||||
def 'should handle single command argument'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
model.command('single-command')
|
||||
|
||||
then:
|
||||
model.command == ['single-command']
|
||||
model.command.size() == 1
|
||||
}
|
||||
|
||||
def 'should handle single resource requirement'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def req = ResourceRequirement.builder()
|
||||
.type(ResourceType.MEMORY)
|
||||
.value('512')
|
||||
.build()
|
||||
|
||||
when:
|
||||
model.resourceRequirements(req)
|
||||
|
||||
then:
|
||||
model.resourceRequirements.size() == 1
|
||||
model.resourceRequirements[0] == req
|
||||
}
|
||||
|
||||
def 'should handle boolean values correctly'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
model.privileged(false)
|
||||
.readonlyRootFilesystem(false)
|
||||
|
||||
then:
|
||||
model.privileged == false
|
||||
model.readonlyRootFilesystem == false
|
||||
|
||||
when:
|
||||
model.privileged(true)
|
||||
.readonlyRootFilesystem(true)
|
||||
|
||||
then:
|
||||
model.privileged == true
|
||||
model.readonlyRootFilesystem == true
|
||||
}
|
||||
|
||||
def 'should convert to ContainerProperties with all fields'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
def req = ResourceRequirement.builder()
|
||||
.type(ResourceType.VCPU)
|
||||
.value('1')
|
||||
.build()
|
||||
def env = [
|
||||
KeyValuePair.builder().name('VAR1').value('value1').build()
|
||||
] as ArrayList<KeyValuePair>
|
||||
def ulimits = [
|
||||
Ulimit.builder().name('nofile').softLimit(1024).hardLimit(2048).build()
|
||||
] as ArrayList<Ulimit>
|
||||
def logConfig = LogConfiguration.builder()
|
||||
.logDriver('awslogs')
|
||||
.build()
|
||||
def mountPoints = [
|
||||
MountPoint.builder()
|
||||
.sourceVolume('tmp')
|
||||
.containerPath('/tmp')
|
||||
.build()
|
||||
]
|
||||
def volumes = [
|
||||
Volume.builder()
|
||||
.name('tmp')
|
||||
.build()
|
||||
]
|
||||
def networkConfig = NetworkConfiguration.builder()
|
||||
.assignPublicIp('ENABLED')
|
||||
.build()
|
||||
def ephemeralStorage = EphemeralStorage.builder()
|
||||
.sizeInGiB(20)
|
||||
.build()
|
||||
def runtimePlatform = RuntimePlatform.builder()
|
||||
.operatingSystemFamily('LINUX')
|
||||
.build()
|
||||
def linuxParams = LinuxParameters.builder()
|
||||
.initProcessEnabled(true)
|
||||
.build()
|
||||
|
||||
when:
|
||||
model.image('ubuntu:20.04')
|
||||
.command('echo', 'hello')
|
||||
.resourceRequirements(req)
|
||||
.jobRoleArn('arn:aws:iam::123456789012:role/BatchJobRole')
|
||||
.executionRoleArn('arn:aws:iam::123456789012:role/BatchExecutionRole')
|
||||
.linuxParameters(linuxParams)
|
||||
.environment(env)
|
||||
.privileged(true)
|
||||
.user('batch-user')
|
||||
.readonlyRootFilesystem(true)
|
||||
.ulimits(ulimits)
|
||||
.logConfiguration(logConfig)
|
||||
.mountPoints(mountPoints)
|
||||
.volumes(volumes)
|
||||
.networkConfiguration(networkConfig)
|
||||
.ephemeralStorage(ephemeralStorage)
|
||||
.runtimePlatform(runtimePlatform)
|
||||
|
||||
def containerProperties = model.toBatchContainerProperties()
|
||||
|
||||
then:
|
||||
containerProperties instanceof ContainerProperties
|
||||
containerProperties.image() == 'ubuntu:20.04'
|
||||
containerProperties.command() == ['echo', 'hello']
|
||||
containerProperties.resourceRequirements().size() == 1
|
||||
containerProperties.resourceRequirements()[0] == req
|
||||
containerProperties.jobRoleArn() == 'arn:aws:iam::123456789012:role/BatchJobRole'
|
||||
containerProperties.executionRoleArn() == 'arn:aws:iam::123456789012:role/BatchExecutionRole'
|
||||
containerProperties.linuxParameters() == linuxParams
|
||||
containerProperties.environment().size() == 1
|
||||
containerProperties.environment()[0].name() == 'VAR1'
|
||||
containerProperties.privileged() == true
|
||||
containerProperties.user() == 'batch-user'
|
||||
containerProperties.readonlyRootFilesystem() == true
|
||||
containerProperties.ulimits().size() == 1
|
||||
containerProperties.ulimits()[0].name() == 'nofile'
|
||||
containerProperties.logConfiguration() == logConfig
|
||||
containerProperties.mountPoints().size() == 1
|
||||
containerProperties.mountPoints()[0].sourceVolume() == 'tmp'
|
||||
containerProperties.volumes().size() == 1
|
||||
containerProperties.volumes()[0].name() == 'tmp'
|
||||
containerProperties.networkConfiguration() == networkConfig
|
||||
containerProperties.ephemeralStorage() == ephemeralStorage
|
||||
containerProperties.runtimePlatform() == runtimePlatform
|
||||
}
|
||||
|
||||
def 'should convert to ContainerProperties with null fields'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
def containerProperties = model.toBatchContainerProperties()
|
||||
|
||||
then:
|
||||
containerProperties instanceof ContainerProperties
|
||||
containerProperties.image() == null
|
||||
containerProperties.command() == []
|
||||
containerProperties.resourceRequirements() == []
|
||||
containerProperties.jobRoleArn() == null
|
||||
containerProperties.executionRoleArn() == null
|
||||
containerProperties.linuxParameters() == null
|
||||
containerProperties.environment() == []
|
||||
containerProperties.privileged() == null
|
||||
containerProperties.user() == null
|
||||
containerProperties.readonlyRootFilesystem() == null
|
||||
containerProperties.ulimits() == []
|
||||
containerProperties.logConfiguration() == null
|
||||
containerProperties.mountPoints() == []
|
||||
containerProperties.volumes() == []
|
||||
containerProperties.networkConfiguration() == null
|
||||
containerProperties.ephemeralStorage() == null
|
||||
containerProperties.runtimePlatform() == null
|
||||
}
|
||||
|
||||
def 'should convert to ContainerProperties with empty collections'() {
|
||||
given:
|
||||
def model = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
model.environment([] as ArrayList<KeyValuePair>)
|
||||
.ulimits([] as ArrayList<Ulimit>)
|
||||
.mountPoints([])
|
||||
.volumes([])
|
||||
|
||||
def containerProperties = model.toBatchContainerProperties()
|
||||
|
||||
then:
|
||||
containerProperties instanceof ContainerProperties
|
||||
containerProperties.environment() == []
|
||||
containerProperties.ulimits() == []
|
||||
containerProperties.mountPoints() == []
|
||||
containerProperties.volumes() == []
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,351 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.batch.model
|
||||
|
||||
import software.amazon.awssdk.services.batch.model.JobDefinitionType
|
||||
import software.amazon.awssdk.services.batch.model.PlatformCapability
|
||||
import software.amazon.awssdk.services.batch.model.RegisterJobDefinitionRequest
|
||||
import spock.lang.Specification
|
||||
|
||||
/**
|
||||
* @author Nextflow Authors
|
||||
*/
|
||||
class RegisterJobDefinitionModelTest extends Specification {
|
||||
|
||||
def 'should create empty model'() {
|
||||
when:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
|
||||
then:
|
||||
model.jobDefinitionName == null
|
||||
model.type == null
|
||||
model.platformCapabilities == null
|
||||
model.containerProperties == null
|
||||
model.parameters == null
|
||||
model.tags == null
|
||||
}
|
||||
|
||||
def 'should set and get job definition name'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
|
||||
when:
|
||||
def result = model.jobDefinitionName('test-job-def')
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.jobDefinitionName == 'test-job-def'
|
||||
}
|
||||
|
||||
def 'should set and get type'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
|
||||
when:
|
||||
def result = model.type(JobDefinitionType.CONTAINER)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.type == JobDefinitionType.CONTAINER
|
||||
}
|
||||
|
||||
def 'should set and get platform capabilities'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
def capabilities = [PlatformCapability.EC2, PlatformCapability.FARGATE]
|
||||
|
||||
when:
|
||||
def result = model.platformCapabilities(capabilities)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.platformCapabilities == capabilities
|
||||
model.platformCapabilities.size() == 2
|
||||
model.platformCapabilities.contains(PlatformCapability.EC2)
|
||||
model.platformCapabilities.contains(PlatformCapability.FARGATE)
|
||||
}
|
||||
|
||||
def 'should set and get container properties'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
def containerProps = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
def result = model.containerProperties(containerProps)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.containerProperties == containerProps
|
||||
}
|
||||
|
||||
def 'should set and get parameters'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
def params = ['key1': 'value1', 'key2': 'value2']
|
||||
|
||||
when:
|
||||
def result = model.parameters(params)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.parameters == params
|
||||
model.parameters.size() == 2
|
||||
model.parameters['key1'] == 'value1'
|
||||
model.parameters['key2'] == 'value2'
|
||||
}
|
||||
|
||||
def 'should set and get tags'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
def tags = ['env': 'test', 'project': 'nextflow']
|
||||
|
||||
when:
|
||||
def result = model.tags(tags)
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.tags == tags
|
||||
model.tags.size() == 2
|
||||
model.tags['env'] == 'test'
|
||||
model.tags['project'] == 'nextflow'
|
||||
}
|
||||
|
||||
def 'should add tag entry when tags is null'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
|
||||
when:
|
||||
def result = model.addTagsEntry('key1', 'value1')
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.tags != null
|
||||
model.tags.size() == 1
|
||||
model.tags['key1'] == 'value1'
|
||||
model.tags instanceof LinkedHashMap
|
||||
}
|
||||
|
||||
def 'should add tag entry when tags already exists'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
model.tags(['existing': 'tag'])
|
||||
|
||||
when:
|
||||
def result = model.addTagsEntry('new', 'value')
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.tags.size() == 2
|
||||
model.tags['existing'] == 'tag'
|
||||
model.tags['new'] == 'value'
|
||||
}
|
||||
|
||||
def 'should handle multiple tag entries'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
|
||||
when:
|
||||
model.addTagsEntry('key1', 'value1')
|
||||
.addTagsEntry('key2', 'value2')
|
||||
.addTagsEntry('key3', 'value3')
|
||||
|
||||
then:
|
||||
model.tags.size() == 3
|
||||
model.tags['key1'] == 'value1'
|
||||
model.tags['key2'] == 'value2'
|
||||
model.tags['key3'] == 'value3'
|
||||
}
|
||||
|
||||
def 'should handle tag entry overwrite'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
|
||||
when:
|
||||
model.addTagsEntry('key1', 'value1')
|
||||
.addTagsEntry('key1', 'value2')
|
||||
|
||||
then:
|
||||
model.tags.size() == 1
|
||||
model.tags['key1'] == 'value2'
|
||||
}
|
||||
|
||||
def 'should support method chaining'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
def containerProps = new ContainerPropertiesModel()
|
||||
def capabilities = [PlatformCapability.EC2]
|
||||
def params = ['param1': 'value1']
|
||||
def tags = ['tag1': 'value1']
|
||||
|
||||
when:
|
||||
def result = model
|
||||
.jobDefinitionName('test-job')
|
||||
.type(JobDefinitionType.CONTAINER)
|
||||
.platformCapabilities(capabilities)
|
||||
.containerProperties(containerProps)
|
||||
.parameters(params)
|
||||
.tags(tags)
|
||||
.addTagsEntry('tag2', 'value2')
|
||||
|
||||
then:
|
||||
result == model
|
||||
model.jobDefinitionName == 'test-job'
|
||||
model.type == JobDefinitionType.CONTAINER
|
||||
model.platformCapabilities == capabilities
|
||||
model.containerProperties == containerProps
|
||||
model.parameters == params
|
||||
model.tags.size() == 2
|
||||
model.tags['tag1'] == 'value1'
|
||||
model.tags['tag2'] == 'value2'
|
||||
}
|
||||
|
||||
def 'should handle empty collections'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
|
||||
when:
|
||||
model.platformCapabilities([])
|
||||
.parameters([:])
|
||||
.tags([:])
|
||||
|
||||
then:
|
||||
model.platformCapabilities == []
|
||||
model.parameters == [:]
|
||||
model.tags == [:]
|
||||
}
|
||||
|
||||
def 'should convert to RegisterJobDefinitionRequest with all fields'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
def containerProps = new ContainerPropertiesModel()
|
||||
containerProps.image('ubuntu:20.04')
|
||||
def capabilities = [PlatformCapability.EC2, PlatformCapability.FARGATE]
|
||||
def params = ['param1': 'value1', 'param2': 'value2']
|
||||
def tags = ['tag1': 'value1', 'tag2': 'value2']
|
||||
|
||||
when:
|
||||
model.jobDefinitionName('test-job-def')
|
||||
.type(JobDefinitionType.CONTAINER)
|
||||
.platformCapabilities(capabilities)
|
||||
.containerProperties(containerProps)
|
||||
.parameters(params)
|
||||
.tags(tags)
|
||||
|
||||
def request = model.toBatchRequest()
|
||||
|
||||
then:
|
||||
request instanceof RegisterJobDefinitionRequest
|
||||
request.jobDefinitionName() == 'test-job-def'
|
||||
request.type() == JobDefinitionType.CONTAINER
|
||||
request.platformCapabilities() == capabilities
|
||||
request.containerProperties() != null
|
||||
request.containerProperties().image() == 'ubuntu:20.04'
|
||||
request.parameters() == params
|
||||
request.tags() == tags
|
||||
}
|
||||
|
||||
def 'should convert to RegisterJobDefinitionRequest with null fields'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
|
||||
when:
|
||||
def request = model.toBatchRequest()
|
||||
|
||||
then:
|
||||
request instanceof RegisterJobDefinitionRequest
|
||||
!request.jobDefinitionName()
|
||||
!request.type()
|
||||
!request.platformCapabilities()
|
||||
!request.containerProperties()
|
||||
!request.parameters()
|
||||
!request.tags()
|
||||
}
|
||||
|
||||
def 'should convert to RegisterJobDefinitionRequest with minimal fields'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
def containerProps = new ContainerPropertiesModel()
|
||||
containerProps.image('nginx')
|
||||
|
||||
when:
|
||||
model.jobDefinitionName('minimal-job')
|
||||
.type(JobDefinitionType.CONTAINER)
|
||||
.containerProperties(containerProps)
|
||||
|
||||
def request = model.toBatchRequest()
|
||||
|
||||
then:
|
||||
request instanceof RegisterJobDefinitionRequest
|
||||
request.jobDefinitionName() == 'minimal-job'
|
||||
request.type() == JobDefinitionType.CONTAINER
|
||||
request.containerProperties() != null
|
||||
request.containerProperties().image() == 'nginx'
|
||||
!request.platformCapabilities()
|
||||
!request.parameters()
|
||||
!request.tags()
|
||||
}
|
||||
|
||||
def 'should convert to RegisterJobDefinitionRequest with empty collections'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
def containerProps = new ContainerPropertiesModel()
|
||||
|
||||
when:
|
||||
model.jobDefinitionName('empty-collections-job')
|
||||
.type(JobDefinitionType.CONTAINER)
|
||||
.platformCapabilities([])
|
||||
.containerProperties(containerProps)
|
||||
.parameters([:])
|
||||
.tags([:])
|
||||
|
||||
def request = model.toBatchRequest()
|
||||
|
||||
then:
|
||||
request instanceof RegisterJobDefinitionRequest
|
||||
request.jobDefinitionName() == 'empty-collections-job'
|
||||
request.type() == JobDefinitionType.CONTAINER
|
||||
request.platformCapabilities() == []
|
||||
request.containerProperties() != null
|
||||
request.parameters() == [:]
|
||||
request.tags() == [:]
|
||||
}
|
||||
|
||||
def 'should handle chaining with toBatchRequest'() {
|
||||
given:
|
||||
def model = new RegisterJobDefinitionModel()
|
||||
def containerProps = new ContainerPropertiesModel()
|
||||
containerProps.image('alpine')
|
||||
|
||||
when:
|
||||
def request = model
|
||||
.jobDefinitionName('chained-job')
|
||||
.type(JobDefinitionType.CONTAINER)
|
||||
.containerProperties(containerProps)
|
||||
.addTagsEntry('env', 'test')
|
||||
.addTagsEntry('project', 'nextflow')
|
||||
.toBatchRequest()
|
||||
|
||||
then:
|
||||
request instanceof RegisterJobDefinitionRequest
|
||||
request.jobDefinitionName() == 'chained-job'
|
||||
request.type() == JobDefinitionType.CONTAINER
|
||||
request.containerProperties().image() == 'alpine'
|
||||
request.tags().size() == 2
|
||||
request.tags()['env'] == 'test'
|
||||
request.tags()['project'] == 'nextflow'
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.config
|
||||
|
||||
import java.nio.file.Paths
|
||||
|
||||
import nextflow.util.Duration
|
||||
import spock.lang.Specification
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class AwsBatchConfigTest extends Specification {
|
||||
|
||||
def 'should create default config' () {
|
||||
when:
|
||||
def batch = new AwsBatchConfig([:])
|
||||
then:
|
||||
batch.maxParallelTransfers == AwsBatchConfig.MAX_TRANSFER
|
||||
batch.maxTransferAttempts == AwsBatchConfig.DEFAULT_AWS_MAX_ATTEMPTS
|
||||
batch.delayBetweenAttempts == AwsBatchConfig.DEFAULT_DELAY_BETWEEN_ATTEMPTS
|
||||
batch.maxSpotAttempts == null
|
||||
batch.retryMode == 'standard'
|
||||
and:
|
||||
!batch.cliPath
|
||||
!batch.volumes
|
||||
!batch.jobRole
|
||||
!batch.executionRole
|
||||
!batch.logsGroup
|
||||
!batch.shareIdentifier
|
||||
!batch.isFargateMode()
|
||||
!batch.s5cmdPath
|
||||
batch.schedulingPriority == 0
|
||||
!batch.terminateUnschedulableJobs
|
||||
!batch.forceGlacierTransfer
|
||||
}
|
||||
|
||||
def 'should create config with options' () {
|
||||
given:
|
||||
def OPTS = [
|
||||
cliPath: '/some/bin/aws',
|
||||
maxParallelTransfers:1,
|
||||
maxTransferAttempts:2,
|
||||
delayBetweenAttempts: '3s',
|
||||
maxSpotAttempts: 4,
|
||||
volumes: '/some/path:/mnt/path,/other/path',
|
||||
jobRole: 'xyz',
|
||||
executionRole: 'some:exec:role',
|
||||
logsGroup: 'group-name-123',
|
||||
retryMode: 'legacy',
|
||||
shareIdentifier: 'id-x1',
|
||||
schedulingPriority: 100,
|
||||
]
|
||||
|
||||
when:
|
||||
def batch = new AwsBatchConfig(OPTS)
|
||||
then:
|
||||
batch.cliPath == '/some/bin/aws'
|
||||
batch.maxParallelTransfers == 1
|
||||
batch.maxTransferAttempts == 2
|
||||
batch.delayBetweenAttempts == Duration.of('3sec')
|
||||
batch.maxSpotAttempts == 4
|
||||
batch.volumes == ['/some/path:/mnt/path', '/other/path']
|
||||
batch.jobRole == 'xyz'
|
||||
batch.executionRole == 'some:exec:role'
|
||||
batch.logsGroup == 'group-name-123'
|
||||
batch.retryMode == 'legacy'
|
||||
batch.shareIdentifier == 'id-x1'
|
||||
batch.schedulingPriority == 100
|
||||
!batch.isFargateMode()
|
||||
}
|
||||
|
||||
def 'should parse volumes list' () {
|
||||
|
||||
given:
|
||||
def executor = new AwsBatchConfig([:])
|
||||
|
||||
expect:
|
||||
executor.makeVols(OBJ) == EXPECTED
|
||||
|
||||
where:
|
||||
OBJ | EXPECTED
|
||||
null | []
|
||||
'foo' | ['foo']
|
||||
'foo, bar' | ['foo','bar']
|
||||
'/foo/,/bar///' | ['/foo','/bar']
|
||||
['/this','/that'] | ['/this','/that']
|
||||
['/foo/bar/'] | ['/foo/bar']
|
||||
|
||||
}
|
||||
|
||||
def 'should add a volume' () {
|
||||
given:
|
||||
def opts = new AwsBatchConfig([:])
|
||||
|
||||
when:
|
||||
opts.addVolume(Paths.get('/some/dir'))
|
||||
then:
|
||||
opts.volumes == ['/some/dir']
|
||||
|
||||
when:
|
||||
opts.addVolume(Paths.get('/other/dir'))
|
||||
opts.addVolume(Paths.get('/other/dir'))
|
||||
then:
|
||||
opts.volumes == ['/some/dir', '/other/dir']
|
||||
}
|
||||
|
||||
def 'should parse cli path' () {
|
||||
given:
|
||||
def opts = new AwsBatchConfig(OPTS)
|
||||
|
||||
expect:
|
||||
opts.cliPath == S3_CLI_PATH
|
||||
opts.s5cmdPath == S5_CLI_PATH
|
||||
opts.isFargateMode() == FARGATE
|
||||
|
||||
where:
|
||||
OPTS | S3_CLI_PATH | S5_CLI_PATH | FARGATE
|
||||
[:] | null | null | false
|
||||
[cliPath: "/opt/bin/aws"] | '/opt/bin/aws' | null | false
|
||||
[cliPath: "/s5cmd"] | null | null | false
|
||||
[cliPath: "/opt/s5cmd --foo"] | null | null | false
|
||||
and:
|
||||
[platformType: 'fargate', cliPath: "/opt/bin/aws"] | null | 's5cmd' | true
|
||||
[platformType: 'fargate', cliPath: "/opt/s5cmd"] | null | '/opt/s5cmd' | true
|
||||
[platformType: 'fargate', cliPath: "/opt/s5cmd --foo"] | null | '/opt/s5cmd --foo'| true
|
||||
}
|
||||
|
||||
def 'should parse unschedulable flag' () {
|
||||
given:
|
||||
def opts = new AwsBatchConfig(OPTS)
|
||||
|
||||
expect:
|
||||
opts.terminateUnschedulableJobs == UNSCHEDULABLE
|
||||
|
||||
where:
|
||||
OPTS | UNSCHEDULABLE
|
||||
[:] | false
|
||||
[terminateUnschedulableJobs: false] | false
|
||||
[terminateUnschedulableJobs: true] | true
|
||||
}
|
||||
|
||||
def 'should parse forceGlacierTransfer flag' () {
|
||||
given:
|
||||
def opts = new AwsBatchConfig(OPTS)
|
||||
|
||||
expect:
|
||||
opts.forceGlacierTransfer == FORCE_GLACIER
|
||||
|
||||
where:
|
||||
OPTS | FORCE_GLACIER
|
||||
[:] | false
|
||||
[forceGlacierTransfer: false] | false
|
||||
[forceGlacierTransfer: true] | true
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.config
|
||||
|
||||
import software.amazon.awssdk.regions.Region
|
||||
|
||||
import java.nio.file.Files
|
||||
|
||||
import nextflow.SysEnv
|
||||
import spock.lang.Specification
|
||||
import spock.lang.Unroll
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class AwsConfigTest extends Specification {
|
||||
|
||||
|
||||
def 'should get aws region'() {
|
||||
expect:
|
||||
AwsConfig.getAwsRegion([:], [:]) == null
|
||||
and:
|
||||
AwsConfig.getAwsRegion([:], [region:'eu-west-2']) == 'eu-west-2'
|
||||
and:
|
||||
// config has priority
|
||||
AwsConfig.getAwsRegion([AWS_DEFAULT_REGION: 'us-central-1'], [region:'eu-west-2']) == 'eu-west-2'
|
||||
and:
|
||||
AwsConfig.getAwsRegion([AWS_DEFAULT_REGION: 'us-central-1'], [:]) == 'us-central-1'
|
||||
}
|
||||
|
||||
def 'should get aws region from aws file'() {
|
||||
given:
|
||||
def file = Files.createTempFile('test','test')
|
||||
file.text = '''
|
||||
[default]
|
||||
aws_access_key_id = aaa
|
||||
aws_secret_access_key = bbbb
|
||||
region = reg-something
|
||||
|
||||
[foo]
|
||||
aws_access_key_id = xxx
|
||||
aws_secret_access_key = yyy
|
||||
region = reg-foo
|
||||
|
||||
[bar]
|
||||
aws_access_key_id = xxx
|
||||
aws_secret_access_key = yyy
|
||||
aws_session_token = zzz
|
||||
'''
|
||||
|
||||
expect:
|
||||
AwsConfig.getAwsRegion0([AWS_DEFAULT_REGION: 'us-central-1'], [:], file) == 'us-central-1'
|
||||
|
||||
and:
|
||||
AwsConfig.getAwsRegion0([:], [:], file) == 'reg-something'
|
||||
|
||||
and:
|
||||
AwsConfig.getAwsRegion0([:], [profile: 'foo'], file) == 'reg-foo'
|
||||
|
||||
cleanup:
|
||||
file?.delete()
|
||||
}
|
||||
|
||||
def 'should get aws config' () {
|
||||
given:
|
||||
SysEnv.push(ENV)
|
||||
and:
|
||||
def config = new AwsConfig(CONFIG)
|
||||
|
||||
expect:
|
||||
config.accessKey == ACCESS_KEY
|
||||
config.secretKey == SECRET_KEY
|
||||
config.profile == PROFILE
|
||||
config.region == REGION
|
||||
config.credentials == (ACCESS_KEY && SECRET_KEY ? [ACCESS_KEY, SECRET_KEY] : [])
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
|
||||
where:
|
||||
ENV | CONFIG | ACCESS_KEY | SECRET_KEY | REGION | PROFILE
|
||||
[:] | [accessKey: 'a', secretKey: 'b'] | 'a' | 'b' | null | null
|
||||
[:] | [accessKey: 'x', secretKey: 'y', region: 'eu-region-x'] | 'x' | 'y' | 'eu-region-x' | null
|
||||
[:] | [accessKey: 'p', secretKey: 'q', profile: 'hola'] | 'p' | 'q' | null | 'hola'
|
||||
and:
|
||||
[AWS_DEFAULT_REGION: 'eu-xyz'] | [:] | null | null | 'eu-xyz' | null
|
||||
[AWS_DEFAULT_PROFILE: 'my-profile'] | [:] | null | null | null | 'my-profile'
|
||||
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should add max error retry' () {
|
||||
|
||||
expect:
|
||||
AwsConfig.checkDefaultErrorRetry(SOURCE, ENV) == EXPECTED
|
||||
|
||||
where:
|
||||
SOURCE | ENV | EXPECTED
|
||||
null | null | [max_error_retry: '5']
|
||||
[foo: 1] | [:] | [max_error_retry: '5', foo: 1]
|
||||
[foo: 1] | [AWS_MAX_ATTEMPTS:'3']| [max_error_retry: '3', foo: 1]
|
||||
[max_error_retry: '2', foo: 1] | [:] | [max_error_retry: '2', foo: 1]
|
||||
[:] | [:] | [max_error_retry: '5']
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should resolve S3 region' () {
|
||||
expect:
|
||||
new AwsConfig(CONFIG).resolveS3Region() == REGION
|
||||
|
||||
where:
|
||||
CONFIG | REGION
|
||||
[:] | Region.US_EAST_1.id()
|
||||
[client: [endpoint: "http://custom.endpoint.com"]] | Region.US_EAST_1.id()
|
||||
[region: "eu-south-1", client: [endpoint: "http://custom.endpoint.com"]] | Region.EU_SOUTH_1.id()
|
||||
[region: "eu-south-1", client: [endpoint: "https://s3.eu-west-1.amazonaws.com"]] | Region.EU_WEST_1.id()
|
||||
[region: "eu-south-1", client: [endpoint: "https://bucket.s3-global.amazonaws.com"]] | Region.EU_SOUTH_1.id()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,205 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.config
|
||||
|
||||
import software.amazon.awssdk.regions.Region
|
||||
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
|
||||
import nextflow.SysEnv
|
||||
import spock.lang.Specification
|
||||
import spock.lang.Unroll
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class AwsS3ConfigTest extends Specification {
|
||||
|
||||
def 'should get default config' () {
|
||||
when:
|
||||
def client = new AwsS3Config([:])
|
||||
then:
|
||||
!client.storageClass
|
||||
!client.storageKmsKeyId
|
||||
!client.storageEncryption
|
||||
!client.debug
|
||||
!client.s3Acl
|
||||
!client.pathStyleAccess
|
||||
!client.anonymous
|
||||
!client.isCustomEndpoint()
|
||||
}
|
||||
|
||||
def 'should set config' () {
|
||||
given:
|
||||
def OPTS = [
|
||||
debug:true,
|
||||
storageClass: 'STANDARD',
|
||||
storageKmsKeyId: 'key-1',
|
||||
storageEncryption: 'AES256',
|
||||
s3Acl: 'public-read',
|
||||
s3PathStyleAccess: true,
|
||||
anonymous: true
|
||||
]
|
||||
|
||||
when:
|
||||
def client = new AwsS3Config(OPTS)
|
||||
then:
|
||||
client.debug
|
||||
client.storageClass == 'STANDARD'
|
||||
client.storageKmsKeyId == 'key-1'
|
||||
client.storageEncryption == 'AES256'
|
||||
client.s3Acl == ObjectCannedACL.PUBLIC_READ
|
||||
client.pathStyleAccess
|
||||
client.anonymous
|
||||
}
|
||||
|
||||
def 'should use legacy upload storage class' () {
|
||||
given:
|
||||
def OPTS = [
|
||||
uploadStorageClass: 'STANDARD_IA',
|
||||
]
|
||||
|
||||
when:
|
||||
def client = new AwsS3Config(OPTS)
|
||||
then:
|
||||
client.storageClass == 'STANDARD_IA'
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should get aws s3 endpoint' () {
|
||||
given:
|
||||
SysEnv.push(ENV)
|
||||
|
||||
when:
|
||||
def config = new AwsS3Config(CONFIG)
|
||||
then:
|
||||
config.endpoint == EXPECTED
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
|
||||
where:
|
||||
ENV | CONFIG | EXPECTED
|
||||
[:] | [:] | null
|
||||
[AWS_S3_ENDPOINT: 'http://foo'] | [:] | 'http://foo'
|
||||
[:] | [endpoint: 'http://bar'] | 'http://bar'
|
||||
[AWS_S3_ENDPOINT: 'http://foo'] | [endpoint: 'http://bar'] | 'http://bar' // <-- config should have priority
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should fail with invalid endpoint protocol' () {
|
||||
when:
|
||||
new AwsS3Config(CONFIG)
|
||||
then:
|
||||
def e = thrown(IllegalArgumentException)
|
||||
e.message == EXPECTED
|
||||
|
||||
where:
|
||||
CONFIG | EXPECTED
|
||||
[endpoint: 'bar.com'] | "S3 endpoint must begin with http:// or https:// prefix - offending value: 'bar.com'"
|
||||
[endpoint: 'ftp://bar.com'] | "S3 endpoint must begin with http:// or https:// prefix - offending value: 'ftp://bar.com'"
|
||||
|
||||
}
|
||||
|
||||
def 'should get s3 legacy properties' () {
|
||||
given:
|
||||
SysEnv.push([:])
|
||||
|
||||
when:
|
||||
def config = new AwsConfig([client:[uploadMaxThreads: 5, uploadChunkSize: 1000, uploadStorageClass: 'STANDARD']])
|
||||
def env = config.getS3LegacyProperties()
|
||||
then:
|
||||
env.upload_storage_class == 'STANDARD'
|
||||
env.upload_chunk_size == '1000'
|
||||
env.upload_max_threads == '5'
|
||||
env.max_error_retry == '5' // <-- default to 5
|
||||
|
||||
when:
|
||||
config = new AwsConfig([client:[uploadMaxThreads: 10, maxErrorRetry: 20, uploadStorageClass: 'ONEZONE_IA']])
|
||||
env = config.getS3LegacyProperties()
|
||||
|
||||
then:
|
||||
env.upload_storage_class == 'ONEZONE_IA'
|
||||
env.upload_max_threads == '10'
|
||||
env.max_error_retry == '20'
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
|
||||
}
|
||||
|
||||
def 'should get maxDownloadHeapMemory' () {
|
||||
given:
|
||||
SysEnv.push([:])
|
||||
|
||||
when:
|
||||
def config = new AwsConfig([client:[ maxDownloadHeapMemory: '100 MB']])
|
||||
def env = config.getS3Config().getAwsClientConfig()
|
||||
then:
|
||||
env.max_download_heap_memory == Long.toString( 100 * 1024 * 1024)
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should check is custom endpoint' () {
|
||||
given:
|
||||
def config = new AwsS3Config(CONFIG)
|
||||
|
||||
expect:
|
||||
config.isCustomEndpoint() == EXPECTED
|
||||
|
||||
where:
|
||||
EXPECTED | CONFIG
|
||||
false | [:]
|
||||
false | [endpoint: 'https://s3.us-east-2.amazonaws.com']
|
||||
true | [endpoint: 'https://foo.com']
|
||||
// consider AWS china as custom ednpoint
|
||||
// see https://github.com/nextflow-io/nextflow/issues/5836
|
||||
true | [endpoint: 'https://xxxx.s3.cn-north-1.vpce.amazonaws.com.cn']
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should fail with invalid maxDownloadHeapMemory and minimumPartSize are incorrect' () {
|
||||
when:
|
||||
new AwsS3Config(CONFIG)
|
||||
then:
|
||||
def e = thrown(IllegalArgumentException)
|
||||
e.message == EXPECTED
|
||||
|
||||
where:
|
||||
CONFIG | EXPECTED
|
||||
[ maxDownloadHeapMemory: '0MB' ] | "Configuration option `aws.client.maxDownloadHeapMemory` can't be 0"
|
||||
[ minimumPartSize: '0MB' ] | "Configuration option `aws.client.minimumPartSize` can't be 0"
|
||||
[ maxDownloadHeapMemory: '50 MB', minimumPartSize: '6 MB'] | "Configuration option `aws.client.maxDownloadHeapMemory` must be at least 10 times `aws.client.minimumPartSize`"
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should get region from endpoint' () {
|
||||
expect:
|
||||
new AwsS3Config(CONFIG).getEndpointRegion() == REGION
|
||||
|
||||
where:
|
||||
CONFIG | REGION
|
||||
[:] | null
|
||||
[endpoint: "http://custom.endpoint.com"] | null
|
||||
[endpoint: "https://s3.eu-west-1.amazonaws.com"] | Region.EU_WEST_1.id()
|
||||
[endpoint: "https://bucket.s3-global.amazonaws.com"] | null
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.fusion
|
||||
|
||||
import nextflow.Global
|
||||
import nextflow.SysEnv
|
||||
import nextflow.fusion.FusionConfig
|
||||
import spock.lang.Specification
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class AwsFusionEnvTest extends Specification {
|
||||
|
||||
def setup() {
|
||||
Global.config = Collections.emptyMap()
|
||||
}
|
||||
|
||||
def 'should return empty env' () {
|
||||
given:
|
||||
def provider = new AwsFusionEnv()
|
||||
when:
|
||||
def env = provider.getEnvironment('az', Mock(FusionConfig))
|
||||
then:
|
||||
env == Collections.emptyMap()
|
||||
}
|
||||
|
||||
def 'should return env environment' () {
|
||||
given:
|
||||
SysEnv.push([AWS_ACCESS_KEY_ID: 'x1', AWS_SECRET_ACCESS_KEY: 'y1', AWS_S3_ENDPOINT: 'http://my-host.com'])
|
||||
and:
|
||||
|
||||
when:
|
||||
def config = Mock(FusionConfig)
|
||||
def env = new AwsFusionEnv().getEnvironment('s3', Mock(FusionConfig))
|
||||
then:
|
||||
env == [AWS_S3_ENDPOINT:'http://my-host.com']
|
||||
|
||||
when:
|
||||
config = Mock(FusionConfig) { exportStorageCredentials() >> true }
|
||||
env = new AwsFusionEnv().getEnvironment('s3', config)
|
||||
then:
|
||||
env == [AWS_ACCESS_KEY_ID: 'x1',
|
||||
AWS_SECRET_ACCESS_KEY: 'y1',
|
||||
AWS_S3_ENDPOINT:'http://my-host.com']
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
}
|
||||
|
||||
def 'should return env environment with SSE config' () {
|
||||
given:
|
||||
Global.config = [aws:[client: [storageEncryption:'aws:kms', storageKmsKeyId: 'xyz']]]
|
||||
and:
|
||||
|
||||
when:
|
||||
def config = Mock(FusionConfig)
|
||||
def env = new AwsFusionEnv().getEnvironment('s3', Mock(FusionConfig))
|
||||
then:
|
||||
env == [FUSION_AWS_SERVER_SIDE_ENCRYPTION:'aws:kms', FUSION_AWS_SSEKMS_KEY_ID:'xyz']
|
||||
|
||||
cleanup:
|
||||
Global.config = null
|
||||
}
|
||||
|
||||
def 'should return env environment with session token' () {
|
||||
given:
|
||||
SysEnv.push([AWS_ACCESS_KEY_ID: 'x1', AWS_SECRET_ACCESS_KEY: 'y1', AWS_S3_ENDPOINT: 'http://my-host.com', AWS_SESSION_TOKEN: 'z1'])
|
||||
and:
|
||||
|
||||
when:
|
||||
def config = Mock(FusionConfig)
|
||||
def env = new AwsFusionEnv().getEnvironment('s3', Mock(FusionConfig))
|
||||
then:
|
||||
env == [AWS_S3_ENDPOINT:'http://my-host.com']
|
||||
|
||||
when:
|
||||
config = Mock(FusionConfig) { exportStorageCredentials() >> true }
|
||||
env = new AwsFusionEnv().getEnvironment('s3', config)
|
||||
then:
|
||||
env == [AWS_ACCESS_KEY_ID: 'x1',
|
||||
AWS_SECRET_ACCESS_KEY: 'y1',
|
||||
AWS_S3_ENDPOINT:'http://my-host.com',
|
||||
AWS_SESSION_TOKEN: 'z1']
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,233 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio
|
||||
|
||||
import software.amazon.awssdk.core.sync.RequestBody
|
||||
import software.amazon.awssdk.services.s3.model.GetObjectRequest
|
||||
import software.amazon.awssdk.services.s3.model.HeadBucketRequest
|
||||
import software.amazon.awssdk.services.s3.model.HeadObjectRequest
|
||||
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.channels.SeekableByteChannel
|
||||
import java.nio.file.Path
|
||||
import java.nio.file.Paths
|
||||
|
||||
import software.amazon.awssdk.services.s3.S3Client
|
||||
import software.amazon.awssdk.services.s3.model.CreateBucketRequest
|
||||
import software.amazon.awssdk.services.s3.model.DeleteBucketRequest
|
||||
import software.amazon.awssdk.services.s3.model.DeleteObjectRequest
|
||||
import software.amazon.awssdk.services.s3.model.S3Exception
|
||||
import software.amazon.awssdk.services.s3.model.ListObjectsV2Request
|
||||
import software.amazon.awssdk.services.s3.model.ListObjectVersionsRequest
|
||||
import software.amazon.awssdk.services.s3.model.S3Object
|
||||
import software.amazon.awssdk.services.s3.model.ObjectVersion
|
||||
import software.amazon.awssdk.services.s3.model.PutObjectRequest
|
||||
import nextflow.cloud.aws.util.S3PathFactory
|
||||
import org.slf4j.Logger
|
||||
import org.slf4j.LoggerFactory
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
trait AwsS3BaseSpec {
|
||||
|
||||
static final Logger log = LoggerFactory.getLogger(AwsS3BaseSpec)
|
||||
|
||||
abstract S3Client getS3Client()
|
||||
|
||||
S3Path s3path(String path) {
|
||||
return (S3Path) S3PathFactory.parse(path)
|
||||
}
|
||||
|
||||
String createBucket(String bucketName) {
|
||||
s3Client.createBucket(CreateBucketRequest.builder().bucket(bucketName).build() as CreateBucketRequest)
|
||||
return bucketName
|
||||
}
|
||||
|
||||
String createBucket() {
|
||||
def name = getRndBucketName()
|
||||
log.debug "Creating s3 bucket '$name'"
|
||||
createBucket(name)
|
||||
}
|
||||
|
||||
String getRndBucketName() {
|
||||
return "nf-s3fs-test-${UUID.randomUUID()}"
|
||||
}
|
||||
|
||||
def createObject(String path, String content) {
|
||||
createObject(Paths.get(path), content)
|
||||
}
|
||||
|
||||
private List<String> splitName(path) {
|
||||
def items = path.toString().tokenize('/')
|
||||
return items.size()==1
|
||||
? [ items[0], null ]
|
||||
: [ items[0], items[1..-1].join('/') ]
|
||||
}
|
||||
|
||||
def createObject(Path path, String content) {
|
||||
log.debug "Creating s3 blob object '$path'"
|
||||
def (bucketName, blobName) = splitName(path)
|
||||
if( !blobName )
|
||||
throw new IllegalArgumentException("There should be at least one dir level: $path")
|
||||
return s3Client.putObject(PutObjectRequest.builder().bucket(bucketName).key(blobName).build() as PutObjectRequest, RequestBody.fromBytes(content.bytes))
|
||||
}
|
||||
|
||||
def createDirectory(String path) {
|
||||
log.debug "Creating blob directory '$path'"
|
||||
def (bucketName, blobName) = splitName(path)
|
||||
blobName += '/'
|
||||
s3Client.putObject(PutObjectRequest.builder().bucket(bucketName).key(blobName).build() as PutObjectRequest, RequestBody.empty())
|
||||
}
|
||||
|
||||
def deleteObject(String path) {
|
||||
log.debug "Deleting blob object '$path'"
|
||||
def (bucketName, blobName) = splitName(path)
|
||||
blobName += '/'
|
||||
s3Client.deleteObject(DeleteObjectRequest.builder().bucket(bucketName).key(blobName).build() as DeleteObjectRequest)
|
||||
}
|
||||
|
||||
def deleteBucket(Path path) {
|
||||
log.debug "Deleting blob bucket '$path'"
|
||||
def (bucketName, blobName) = splitName(path)
|
||||
assert blobName == null
|
||||
deleteBucket(bucketName)
|
||||
}
|
||||
|
||||
def deleteBucket(String bucketName) {
|
||||
log.debug "Deleting blob bucket '$bucketName'"
|
||||
if( !bucketName )
|
||||
return
|
||||
|
||||
// Delete all objects from the bucket. This is sufficient
|
||||
// for unversioned buckets. For versioned buckets, when you attempt to delete objects, Amazon S3 inserts
|
||||
// delete markers for all objects, but doesn't delete the object versions.
|
||||
// To delete objects from versioned buckets, delete all of the object versions before deleting
|
||||
// the bucket (see below for an example).
|
||||
def objectListingIterator = s3Client.listObjectsV2Paginator(ListObjectsV2Request.builder().bucket(bucketName).build() as ListObjectsV2Request).iterator();
|
||||
while (objectListingIterator.hasNext()) {
|
||||
Iterator<S3Object> objIter = objectListingIterator.next().contents().iterator();
|
||||
while (objIter.hasNext()) {
|
||||
s3Client.deleteObject(DeleteObjectRequest.builder().bucket(bucketName).key(objIter.next().key()).build() as DeleteObjectRequest);
|
||||
}
|
||||
}
|
||||
|
||||
// Delete all object versions (required for versioned buckets).
|
||||
def versionListIterator = s3Client.listObjectVersionsPaginator(ListObjectVersionsRequest.builder().bucket(bucketName).build() as ListObjectVersionsRequest).iterator();
|
||||
while ( versionListIterator.hasNext()){
|
||||
Iterator<ObjectVersion> versionIter = versionListIterator.next().versions().iterator();
|
||||
while ( versionIter.hasNext() ) {
|
||||
ObjectVersion vs = versionIter.next();
|
||||
s3Client.deleteObject(DeleteObjectRequest.builder().bucket(bucketName).key(vs.key()).versionId(vs.versionId()).build() as DeleteObjectRequest);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// After all objects and object versions are deleted, delete the bucket.
|
||||
s3Client.deleteBucket( DeleteBucketRequest.builder().bucket(bucketName).build() as DeleteBucketRequest);
|
||||
|
||||
}
|
||||
|
||||
def tryDeleteBucket(String bucketName) {
|
||||
try {
|
||||
deleteBucket(bucketName)
|
||||
}
|
||||
catch (Throwable t) {
|
||||
log.warn ("Unable to delete blob bucket '$bucketName' - Raeason: ${t.message ?: t}")
|
||||
}
|
||||
}
|
||||
|
||||
boolean existsPath(String path) {
|
||||
log.debug "Check blob path exists '$path'"
|
||||
existsPath(Paths.get(path))
|
||||
}
|
||||
|
||||
boolean existsPath(Path path) {
|
||||
log.debug "Check blob path exists '$path'"
|
||||
def (bucketName, blobName) = splitName(path)
|
||||
if( !bucketName )
|
||||
throw new IllegalArgumentException("Invalid S3 path $path")
|
||||
|
||||
try {
|
||||
if( !blobName ) {
|
||||
return s3Client.headBucket(HeadBucketRequest.builder().bucket(bucketName).build() as HeadBucketRequest)
|
||||
}
|
||||
else {
|
||||
s3Client.headObject(HeadObjectRequest.builder().bucket(bucketName).key(blobName).build() as HeadObjectRequest)
|
||||
return true
|
||||
}
|
||||
}
|
||||
catch (S3Exception e) {
|
||||
if( e.statusCode() == 404 )
|
||||
return false
|
||||
throw e
|
||||
}
|
||||
}
|
||||
|
||||
String readObject(String path) {
|
||||
log.debug "Reading blob object '$path'"
|
||||
readObject(Paths.get(path))
|
||||
}
|
||||
|
||||
String readObject(Path path) {
|
||||
log.debug "Reading blob object '$path'"
|
||||
def (bucketName, blobName) = splitName(path)
|
||||
return s3Client
|
||||
.getObject(GetObjectRequest.builder().bucket(bucketName).key(blobName).build() as GetObjectRequest)
|
||||
.getText()
|
||||
}
|
||||
|
||||
|
||||
String randomText(int size) {
|
||||
def result = new StringBuilder()
|
||||
while( result.size() < size ) {
|
||||
result << UUID.randomUUID().toString() << '\n'
|
||||
}
|
||||
return result.toString()
|
||||
}
|
||||
|
||||
String readChannel(SeekableByteChannel sbc, int buffLen ) {
|
||||
def buffer = new ByteArrayOutputStream()
|
||||
ByteBuffer bf = ByteBuffer.allocate(buffLen)
|
||||
while((sbc.read(bf))>0) {
|
||||
bf.flip();
|
||||
buffer.write(bf.array(), 0, bf.limit())
|
||||
bf.clear();
|
||||
}
|
||||
|
||||
buffer.toString()
|
||||
}
|
||||
|
||||
void writeChannel( SeekableByteChannel channel, String content, int buffLen ) {
|
||||
|
||||
def bytes = content.getBytes()
|
||||
ByteBuffer buf = ByteBuffer.allocate(buffLen);
|
||||
int i=0
|
||||
while( i < bytes.size()) {
|
||||
|
||||
def len = Math.min(buffLen, bytes.size()-i);
|
||||
buf.clear();
|
||||
buf.put(bytes, i, len);
|
||||
buf.flip();
|
||||
channel.write(buf);
|
||||
|
||||
i += len
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,145 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio
|
||||
|
||||
import java.nio.file.AccessDeniedException
|
||||
import java.nio.file.NoSuchFileException
|
||||
|
||||
import software.amazon.awssdk.awscore.exception.AwsErrorDetails
|
||||
import software.amazon.awssdk.awscore.exception.AwsServiceException
|
||||
import software.amazon.awssdk.core.exception.SdkClientException
|
||||
import software.amazon.awssdk.core.exception.SdkException
|
||||
import software.amazon.awssdk.services.s3.model.NoSuchBucketException
|
||||
import software.amazon.awssdk.services.s3.model.NoSuchKeyException
|
||||
import spock.lang.Specification
|
||||
import spock.lang.Unroll
|
||||
|
||||
/**
|
||||
* Tests for the AWS SDK → NIO exception conversion in {@link S3Client#convertAwsException}.
|
||||
*/
|
||||
class S3ClientTest extends Specification {
|
||||
|
||||
def 'should map NoSuchBucketException to NoSuchFileException'() {
|
||||
given:
|
||||
def aws = NoSuchBucketException.builder().message('nope').build()
|
||||
|
||||
when:
|
||||
def result = S3Client.convertAwsException(aws, 'listObjects', 'my-bucket', null)
|
||||
|
||||
then:
|
||||
result instanceof NoSuchFileException
|
||||
result.file == 's3://my-bucket'
|
||||
result.cause.is(aws)
|
||||
}
|
||||
|
||||
def 'should map NoSuchKeyException to NoSuchFileException'() {
|
||||
given:
|
||||
def aws = NoSuchKeyException.builder().message('missing').build()
|
||||
|
||||
when:
|
||||
def result = S3Client.convertAwsException(aws, 'getObject', 'my-bucket', 'path/to/obj')
|
||||
|
||||
then:
|
||||
result instanceof NoSuchFileException
|
||||
result.file == 's3://my-bucket/path/to/obj'
|
||||
result.cause.is(aws)
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should map HTTP #code to NoSuchFileException'() {
|
||||
given:
|
||||
def aws = AwsServiceException.builder()
|
||||
.message('err')
|
||||
.awsErrorDetails(AwsErrorDetails.builder().errorCode('X').build())
|
||||
.statusCode(code)
|
||||
.build()
|
||||
|
||||
when:
|
||||
def result = S3Client.convertAwsException(aws, 'getObject', 'my-bucket', 'key')
|
||||
|
||||
then:
|
||||
result instanceof NoSuchFileException
|
||||
result.file == 's3://my-bucket/key'
|
||||
result.cause.is(aws)
|
||||
|
||||
where:
|
||||
code << [404]
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should map HTTP #code to AccessDeniedException'() {
|
||||
given:
|
||||
def aws = AwsServiceException.builder()
|
||||
.message('denied')
|
||||
.awsErrorDetails(AwsErrorDetails.builder().errorCode('X').build())
|
||||
.statusCode(code)
|
||||
.build()
|
||||
|
||||
when:
|
||||
def result = S3Client.convertAwsException(aws, 'getObject', 'my-bucket', 'key')
|
||||
|
||||
then:
|
||||
result instanceof AccessDeniedException
|
||||
result.file == 's3://my-bucket/key'
|
||||
result.cause.is(aws)
|
||||
|
||||
where:
|
||||
code << [401, 403]
|
||||
}
|
||||
|
||||
def 'should map other AwsServiceException to generic IOException'() {
|
||||
given:
|
||||
def aws = AwsServiceException.builder()
|
||||
.message('boom')
|
||||
.awsErrorDetails(AwsErrorDetails.builder().errorCode('X').build())
|
||||
.statusCode(500)
|
||||
.build()
|
||||
|
||||
when:
|
||||
def result = S3Client.convertAwsException(aws, 'putObject', 'my-bucket', 'k')
|
||||
|
||||
then:
|
||||
result instanceof IOException
|
||||
!(result instanceof NoSuchFileException)
|
||||
!(result instanceof AccessDeniedException)
|
||||
result.message.contains('putObject')
|
||||
result.message.contains('s3://my-bucket/k')
|
||||
result.cause.is(aws)
|
||||
}
|
||||
|
||||
def 'should map non-service SdkException to generic IOException'() {
|
||||
given:
|
||||
SdkException aws = SdkClientException.builder().message('network down').build()
|
||||
|
||||
when:
|
||||
def result = S3Client.convertAwsException(aws, 'listBuckets', null, null)
|
||||
|
||||
then:
|
||||
result.getClass() == IOException
|
||||
result.message.contains('listBuckets')
|
||||
result.message.contains('s3://')
|
||||
result.cause.is(aws)
|
||||
}
|
||||
|
||||
def 'should format path without trailing slash when key is null or empty'() {
|
||||
expect:
|
||||
(S3Client.convertAwsException(NoSuchBucketException.builder().message('').build(), 'op', 'b', key) as NoSuchFileException).file == 's3://b'
|
||||
|
||||
where:
|
||||
key << [null, '']
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio
|
||||
|
||||
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
|
||||
import software.amazon.awssdk.services.s3.model.ServerSideEncryption
|
||||
import spock.lang.Specification
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class S3FileSystemProviderTest extends Specification {
|
||||
|
||||
def 'should create filesystem from config'(){
|
||||
given:
|
||||
def config = [
|
||||
client: [
|
||||
anonymous: true,
|
||||
s3Acl: 'Private',
|
||||
connectionTimeout: 20000,
|
||||
endpoint: 'https://s3.eu-west-1.amazonaws.com',
|
||||
maxConcurrency: 10,
|
||||
maxNativeMemory: '500MB',
|
||||
minimumPartSize: '7MB',
|
||||
multipartThreshold: '32MB',
|
||||
maxConnections: 100,
|
||||
maxErrorRetry: 3,
|
||||
socketTimeout: 20000,
|
||||
requesterPays: true,
|
||||
s3PathStyleAccess: true,
|
||||
proxyHost: 'host.com',
|
||||
proxyPort: 80,
|
||||
proxyScheme: 'https',
|
||||
proxyUsername: 'user',
|
||||
proxyPassword: 'pass',
|
||||
storageEncryption: 'AES256',
|
||||
storageKmsKeyId: 'arn:key:id',
|
||||
uploadMaxThreads: 15,
|
||||
uploadChunkSize: '7MB',
|
||||
uploadMaxAttempts: 4,
|
||||
uploadRetrySleep: '200ms'
|
||||
],
|
||||
accessKey: '123456abc',
|
||||
secretKey: '78910def',
|
||||
profile: 'test'
|
||||
]
|
||||
def provider = new S3FileSystemProvider();
|
||||
when:
|
||||
def fs = provider.newFileSystem(new URI("s3:///bucket/key"), config) as S3FileSystem
|
||||
then:
|
||||
fs.getBucketName() == 'bucket'
|
||||
def client = fs.getClient()
|
||||
client.client != null
|
||||
client.cannedAcl == ObjectCannedACL.PRIVATE
|
||||
client.storageEncryption == ServerSideEncryption.AES256
|
||||
client.isRequesterPaysEnabled == true
|
||||
client.kmsKeyId == 'arn:key:id'
|
||||
client.factory.accessKey() == '123456abc'
|
||||
client.factory.secretKey() == '78910def'
|
||||
client.factory.profile() == 'test'
|
||||
client.factory.config.s3Config.anonymous == true
|
||||
client.factory.config.s3Config.endpoint == 'https://s3.eu-west-1.amazonaws.com'
|
||||
client.factory.config.s3Config.pathStyleAccess == true
|
||||
fs.properties().getProperty('proxy_host') == 'host.com'
|
||||
fs.properties().getProperty('proxy_port') == '80'
|
||||
fs.properties().getProperty('proxy_scheme') == 'https'
|
||||
fs.properties().getProperty('proxy_username') == 'user'
|
||||
fs.properties().getProperty('proxy_password') == 'pass'
|
||||
fs.properties().getProperty('socket_timeout') == '20000'
|
||||
fs.properties().getProperty('connection_timeout') == '20000'
|
||||
fs.properties().getProperty('max_connections') == '100'
|
||||
fs.properties().getProperty('max_error_retry') == '3'
|
||||
fs.properties().getProperty('upload_max_attempts') == '4'
|
||||
fs.properties().getProperty('upload_retry_sleep') == '200'
|
||||
fs.properties().getProperty('upload_chunk_size') == '7340032' //7MB
|
||||
fs.properties().getProperty('upload_max_threads') == '15'
|
||||
fs.properties().getProperty('max_concurrency') == '10'
|
||||
fs.properties().getProperty('max_native_memory') == '524288000' //500MB
|
||||
fs.properties().getProperty('minimum_part_size') == '7340032' //7MB
|
||||
fs.properties().getProperty('multipart_threshold') == '33554432' //32MB
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio
|
||||
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
import java.util.concurrent.Callable
|
||||
import java.util.concurrent.ExecutorService
|
||||
import java.util.concurrent.Executors
|
||||
import java.util.concurrent.Future
|
||||
import java.util.concurrent.ThreadFactory
|
||||
import java.util.concurrent.TimeUnit
|
||||
import java.util.concurrent.TimeoutException
|
||||
import java.util.zip.GZIPInputStream
|
||||
|
||||
import groovy.util.logging.Slf4j
|
||||
import nextflow.Global
|
||||
import nextflow.Session
|
||||
import nextflow.cloud.aws.util.S3PathFactory
|
||||
import nextflow.file.FileHelper
|
||||
import software.amazon.awssdk.core.ResponseInputStream
|
||||
import spock.lang.IgnoreIf
|
||||
import spock.lang.Shared
|
||||
import spock.lang.Specification
|
||||
|
||||
/**
|
||||
* Regression test for {@link S3FileSystemProvider#newInputStream} close-on-partial-read behavior.
|
||||
*
|
||||
* Before the fix, {@code newInputStream()} returned the raw {@code ResponseInputStream} from the
|
||||
* AWS SDK. Closing it without reading to EOF would trigger Apache HTTP client's
|
||||
* {@code ContentLengthInputStream.close()}, which drains the remaining response body to release
|
||||
* the connection back to the pool. For a multi-GB object this blocked the caller for many
|
||||
* minutes. The fix wraps the stream so {@code close()} calls {@code ResponseInputStream.abort()}
|
||||
* instead.
|
||||
*
|
||||
* A dedicated spec is used because the test requires non-trivial orchestration that would
|
||||
* clutter {@link AwsS3NioTest}:
|
||||
* - the wall-clock bound must be enforced on the caller side — Spock {@code @Timeout} relies
|
||||
* on {@code Thread.interrupt()}, which does not unblock a thread parked in
|
||||
* {@code NioSocketImpl.timedRead()} on a native SSL read;
|
||||
* - when the regression is present the worker thread cannot be stopped by interrupt; the
|
||||
* spec captures the underlying {@link ResponseInputStream} so it can call {@code abort()}
|
||||
* from the test thread on timeout to force-release the HTTP connection.
|
||||
*
|
||||
* The test reads the first line of a public ~1GB FASTQ in the {@code ngi-igenomes} bucket
|
||||
* (eu-west-1, anonymous). Without the fix the run blows the 30s wall-clock bound; with the fix
|
||||
* it completes in seconds.
|
||||
*
|
||||
* @author Jorge Ejarque <jorge.ejarque@seqera.io>
|
||||
*/
|
||||
@Slf4j
|
||||
@IgnoreIf({ System.getenv('NXF_SMOKE') })
|
||||
class S3InputStreamAbortTest extends Specification {
|
||||
|
||||
static final String PUBLIC_FASTQ =
|
||||
's3://ngi-igenomes/test-data/sarek/SRR7890919_WES_HCC1395BL-EA_normal_1.fastq.gz'
|
||||
|
||||
static final long TIMEOUT_SECONDS = 30
|
||||
|
||||
@Shared
|
||||
private ExecutorService executor
|
||||
|
||||
def setupSpec() {
|
||||
executor = Executors.newSingleThreadExecutor({ Runnable r ->
|
||||
def t = new Thread(r, 's3-abort-test-worker')
|
||||
t.daemon = true // so a hung worker cannot keep the JVM alive
|
||||
return t
|
||||
} as ThreadFactory)
|
||||
}
|
||||
|
||||
def cleanupSpec() {
|
||||
executor?.shutdownNow()
|
||||
}
|
||||
|
||||
def setup() {
|
||||
// Anonymous S3 access — ngi-igenomes is public, bucket lives in eu-west-1.
|
||||
def cfg = [aws: [client: [anonymous: true], region: 'eu-west-1']]
|
||||
FileHelper.getOrCreateFileSystemFor(URI.create('s3:///'), cfg.aws)
|
||||
Global.config = cfg
|
||||
Global.session = Mock(Session) { getConfig() >> cfg }
|
||||
}
|
||||
|
||||
def 'close on a partially-consumed newInputStream should abort, not drain'() {
|
||||
given: 'an S3 path to a large (~1GB) gzipped object'
|
||||
final Path path = (Path) S3PathFactory.parse(PUBLIC_FASTQ)
|
||||
|
||||
and: 'open the stream on the test thread'
|
||||
final InputStream raw = Files.newInputStream(path)
|
||||
|
||||
when: 'read the first line and close on a background thread, bounded by a wall-clock timeout'
|
||||
final Future<String> future = executor.submit({
|
||||
String line = null
|
||||
raw.withCloseable { InputStream is -> // close() here is the code path under test
|
||||
def gz = new GZIPInputStream(is)
|
||||
def reader = new BufferedReader(new InputStreamReader(gz, 'ASCII'))
|
||||
line = reader.readLine()
|
||||
}
|
||||
return line
|
||||
} as Callable<String>)
|
||||
|
||||
String firstLine
|
||||
try {
|
||||
firstLine = future.get(TIMEOUT_SECONDS, TimeUnit.SECONDS)
|
||||
}
|
||||
catch (TimeoutException e) {
|
||||
// Thread.interrupt() cannot unblock the native SSL read — forcibly release the
|
||||
// HTTP connection by calling abort() on the underlying ResponseInputStream so the
|
||||
// worker thread can exit instead of lingering until the full body has drained.
|
||||
log.warn("Timed out after ${TIMEOUT_SECONDS}s waiting for close() — aborting underlying S3 stream")
|
||||
raw.abort()
|
||||
throw e
|
||||
}
|
||||
finally {
|
||||
future.cancel(true)
|
||||
}
|
||||
|
||||
then: 'no timeout occurred and the first FASTQ record identifier was returned'
|
||||
noExceptionThrown()
|
||||
firstLine?.startsWith('@')
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio
|
||||
|
||||
import nextflow.Global
|
||||
import nextflow.Session
|
||||
import nextflow.cloud.aws.nio.util.S3MultipartOptions
|
||||
import nextflow.file.FileHelper
|
||||
import software.amazon.awssdk.services.s3.S3Client
|
||||
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest
|
||||
import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse
|
||||
import software.amazon.awssdk.services.s3.model.UploadPartResponse
|
||||
import spock.lang.IgnoreIf
|
||||
import spock.lang.Requires
|
||||
import spock.lang.Specification
|
||||
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.attribute.BasicFileAttributes
|
||||
|
||||
/**
|
||||
* Test for S3OutputStream
|
||||
*
|
||||
* @author Jorge Ejarque <jorge.ejarque@seqera.io>
|
||||
*/
|
||||
class S3OutputStreamTest extends Specification implements AwsS3BaseSpec {
|
||||
|
||||
private S3Client s3Client0
|
||||
|
||||
S3Client getS3Client() { s3Client0 }
|
||||
|
||||
static private Map config0() {
|
||||
def accessKey = System.getenv('AWS_S3FS_ACCESS_KEY')
|
||||
def secretKey = System.getenv('AWS_S3FS_SECRET_KEY')
|
||||
return [aws: [accessKey: accessKey, secretKey: secretKey]]
|
||||
}
|
||||
|
||||
def setup() {
|
||||
def fs = (S3FileSystem) FileHelper.getOrCreateFileSystemFor(URI.create("s3:///"), config0().aws)
|
||||
s3Client0 = fs.client.getClient()
|
||||
and:
|
||||
def cfg = config0()
|
||||
Global.config = cfg
|
||||
Global.session = Mock(Session) { getConfig() >> cfg }
|
||||
}
|
||||
|
||||
@IgnoreIf({System.getenv('NXF_SMOKE')})
|
||||
@Requires({System.getenv('AWS_S3FS_ACCESS_KEY') && System.getenv('AWS_S3FS_SECRET_KEY')})
|
||||
def 'should ensure multipart is used'() {
|
||||
given:
|
||||
def bucket = createBucket()
|
||||
and:
|
||||
def chunksize = 6 * 1024 * 1024
|
||||
def bytes = new byte[chunksize]
|
||||
new Random().nextBytes(bytes)
|
||||
final path = s3path("s3://$bucket/file.txt")
|
||||
def multipart = new S3MultipartOptions()
|
||||
multipart.setChunkSize(chunksize)
|
||||
multipart.setBufferSize(chunksize)
|
||||
when:
|
||||
def writer = new S3OutputStream(s3Client0, path.toS3ObjectId(), multipart)
|
||||
10.times { it ->
|
||||
writer.write(bytes);
|
||||
writer.flush()
|
||||
}
|
||||
writer.close()
|
||||
|
||||
then:
|
||||
writer.partsCount == 10
|
||||
existsPath(path)
|
||||
Files.readAttributes(path, BasicFileAttributes).size() == 10 * chunksize
|
||||
|
||||
cleanup:
|
||||
if( bucket ) deleteBucket(bucket)
|
||||
}
|
||||
|
||||
@IgnoreIf({System.getenv('NXF_SMOKE')})
|
||||
@Requires({System.getenv('AWS_S3FS_ACCESS_KEY') && System.getenv('AWS_S3FS_SECRET_KEY')})
|
||||
def 'should upload empty stream'() {
|
||||
given:
|
||||
def bucket = createBucket()
|
||||
and:
|
||||
final path = s3path("s3://$bucket/file.txt")
|
||||
def multipart = new S3MultipartOptions()
|
||||
when:
|
||||
def writer = new S3OutputStream(s3Client0, path.toS3ObjectId(), multipart)
|
||||
writer.close()
|
||||
|
||||
then:
|
||||
writer.partsCount == 0
|
||||
existsPath(path)
|
||||
Files.readAttributes(path, BasicFileAttributes).size() == 0
|
||||
|
||||
cleanup:
|
||||
if( bucket ) deleteBucket(bucket)
|
||||
}
|
||||
@IgnoreIf({System.getenv('NXF_SMOKE')})
|
||||
@Requires({System.getenv('AWS_S3FS_ACCESS_KEY') && System.getenv('AWS_S3FS_SECRET_KEY')})
|
||||
def 'should upload without multipart'() {
|
||||
given:
|
||||
def bucket = createBucket()
|
||||
and:
|
||||
def TEXT = randomText(50 * 1024)
|
||||
final path = s3path("s3://$bucket/file.txt")
|
||||
def multipart = new S3MultipartOptions()
|
||||
when:
|
||||
def writer = new S3OutputStream(s3Client0, path.toS3ObjectId(), multipart)
|
||||
writer.write(TEXT.bytes)
|
||||
writer.close()
|
||||
|
||||
then:
|
||||
writer.partsCount == 0
|
||||
existsPath(path)
|
||||
readObject(path) == TEXT
|
||||
|
||||
cleanup:
|
||||
if( bucket ) deleteBucket(bucket)
|
||||
}
|
||||
|
||||
def 'should send sorted parts to completeMultipartUpload'() {
|
||||
given:
|
||||
final path = s3path("s3://test/file.txt")
|
||||
def multipart = new S3MultipartOptions()
|
||||
def client = Mock(S3Client)
|
||||
def capturedParts = null
|
||||
|
||||
def writer = new S3OutputStream(client, path.toS3ObjectId(), multipart)
|
||||
|
||||
when: 'simulate unsorted uploads'
|
||||
writer.init()
|
||||
writer.uploadPart(InputStream.nullInputStream(), 25, "checksum".bytes, 2, true)
|
||||
writer.uploadPart(InputStream.nullInputStream(), 25, "checksum".bytes, 0, false)
|
||||
writer.uploadPart(InputStream.nullInputStream(), 25, "checksum".bytes, 1, false)
|
||||
writer.completeMultipartUpload()
|
||||
|
||||
then:
|
||||
1 * client.createMultipartUpload(_) >> CreateMultipartUploadResponse.builder().uploadId("upload-id").build()
|
||||
3 * client.uploadPart(_,_) >> { UploadPartResponse.builder().eTag('etag').build()}
|
||||
1 * client.completeMultipartUpload(_ as CompleteMultipartUploadRequest) >> { CompleteMultipartUploadRequest req ->
|
||||
capturedParts = req.multipartUpload().parts()
|
||||
return null
|
||||
}
|
||||
capturedParts[0].partNumber() == 0
|
||||
capturedParts[1].partNumber() == 1
|
||||
capturedParts[2].partNumber() == 2
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.ng
|
||||
|
||||
import nextflow.util.Duration
|
||||
import nextflow.util.MemoryUnit
|
||||
import spock.lang.Specification
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class DownloadOptsTest extends Specification {
|
||||
|
||||
def 'should get default options' () {
|
||||
given:
|
||||
def props = new Properties()
|
||||
|
||||
when:
|
||||
def opts = DownloadOpts.from(props)
|
||||
then:
|
||||
opts.numWorkers() == 10
|
||||
opts.queueMaxSize() == 10_000
|
||||
opts.bufferMaxSize() == MemoryUnit.of('1 GB')
|
||||
opts.chunkSize() == 10 * 1024 * 1024
|
||||
!opts.parallelEnabled()
|
||||
opts.maxDelayMillis() == Duration.of('90s').toMillis()
|
||||
opts.maxAttempts() == 5
|
||||
}
|
||||
|
||||
def 'should set options with properties' () {
|
||||
given:
|
||||
def CONFIG = '''
|
||||
download_parallel = false
|
||||
download_queue_max_size = 11
|
||||
download_buffer_max_size = 222MB
|
||||
download_num_workers = 33
|
||||
download_chunk_size = 44
|
||||
download_max_attempts = 99
|
||||
download_max_delay = 99s
|
||||
'''
|
||||
def props = new Properties()
|
||||
props.load(new StringReader(CONFIG))
|
||||
|
||||
when:
|
||||
def opts = DownloadOpts.from(props)
|
||||
then:
|
||||
opts.numWorkers() == 33
|
||||
opts.queueMaxSize() == 11
|
||||
opts.bufferMaxSize() == MemoryUnit.of('222 MB')
|
||||
opts.chunkSize() == 44
|
||||
!opts.parallelEnabled()
|
||||
opts.maxAttempts() == 99
|
||||
opts.maxDelayMillis() == Duration.of('99s').toMillis()
|
||||
}
|
||||
|
||||
|
||||
def 'should set options with env' () {
|
||||
given:
|
||||
def ENV = [
|
||||
NXF_S3_DOWNLOAD_PARALLEL: 'false',
|
||||
NXF_S3_DOWNLOAD_QUEUE_SIZE: '11',
|
||||
NXF_S3_DOWNLOAD_NUM_WORKERS: '22',
|
||||
NXF_S3_DOWNLOAD_CHUNK_SIZE: '33',
|
||||
NXF_S3_DOWNLOAD_BUFFER_MAX_MEM: '44 G',
|
||||
NXF_S3_DOWNLOAD_MAX_ATTEMPTS: '88',
|
||||
NXF_S3_DOWNLOAD_MAX_DELAY: '88s'
|
||||
]
|
||||
|
||||
when:
|
||||
def opts = DownloadOpts.from(new Properties(), ENV)
|
||||
then:
|
||||
!opts.parallelEnabled()
|
||||
opts.queueMaxSize() == 11
|
||||
opts.numWorkers() == 22
|
||||
opts.chunkSize() == 33
|
||||
opts.bufferMaxSize() == MemoryUnit.of('44 GB')
|
||||
opts.maxAttempts() == 88
|
||||
opts.maxDelayMillis() == Duration.of('88s').toMillis()
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.ng
|
||||
|
||||
|
||||
import java.util.concurrent.Executors
|
||||
import java.util.function.Function
|
||||
|
||||
import spock.lang.Specification
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class FutureInputStreamTest extends Specification {
|
||||
|
||||
def 'should read the stream ad give back the chunks' () {
|
||||
given:
|
||||
def STR = "hello world!"
|
||||
def BYTES = STR.bytes
|
||||
def CHUNK_SIZE = BYTES.length +2
|
||||
def TIMES = 10
|
||||
def CAPACITY = 1
|
||||
def buffers = new ChunkBufferFactory(CHUNK_SIZE, CAPACITY)
|
||||
and:
|
||||
def executor = Executors.newFixedThreadPool(10)
|
||||
|
||||
and:
|
||||
def parts = []; TIMES.times { parts.add(it) }
|
||||
def Function<Integer,ChunkBuffer> task = {
|
||||
def chunk = buffers.create()
|
||||
chunk.fill( new ByteArrayInputStream(BYTES) )
|
||||
chunk.makeReadable()
|
||||
return chunk
|
||||
}
|
||||
|
||||
when:
|
||||
def itr = new FutureIterator(parts, task, executor, CAPACITY)
|
||||
def stream = new FutureInputStream(itr)
|
||||
|
||||
then:
|
||||
stream.text == STR * TIMES
|
||||
and:
|
||||
buffers.getPoolSize() == CAPACITY
|
||||
|
||||
cleanup:
|
||||
executor.shutdownNow()
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.nio.util
|
||||
|
||||
import software.amazon.awssdk.transfer.s3.S3TransferManager
|
||||
import spock.lang.Specification
|
||||
import spock.lang.Unroll
|
||||
|
||||
|
||||
/**
|
||||
* Test for ExtendedS3TransferManager
|
||||
*
|
||||
* @author Jorge Ejarque <jorge.ejarque@seqera.io>
|
||||
*/
|
||||
class ExtendedS3TransferManagerTest extends Specification {
|
||||
|
||||
def 'should initialize with default values'() {
|
||||
given:
|
||||
def mockTransferManager = Mock(S3TransferManager)
|
||||
def props = new Properties()
|
||||
|
||||
when:
|
||||
def extendedManager = new ExtendedS3TransferManager(mockTransferManager, props)
|
||||
|
||||
then:
|
||||
extendedManager.partSize == 8 * 1024 * 1024 // 8 MB
|
||||
extendedManager.downloadPermits == 50 // 400MB / 8MB
|
||||
}
|
||||
|
||||
def 'should initialize with custom properties'() {
|
||||
given:
|
||||
def mockTransferManager = Mock(S3TransferManager)
|
||||
def props = new Properties()
|
||||
props.setProperty('max_download_heap_memory', '200000000') // 200 MB
|
||||
props.setProperty('minimum_part_size', '16777216') // 16 MB
|
||||
|
||||
when:
|
||||
def extendedManager = new ExtendedS3TransferManager(mockTransferManager, props)
|
||||
|
||||
then:
|
||||
extendedManager.partSize == 16 * 1024 * 1024 // 16 MB
|
||||
extendedManager.downloadPermits == 11 // 200MB / 16MB (floor) = 11.92... -> 11
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should estimate parts correctly'() {
|
||||
given:
|
||||
def mockTransferManager = Mock(S3TransferManager)
|
||||
def props = new Properties()
|
||||
props.setProperty('minimum_part_size', partSizeStr)
|
||||
def extendedManager = new ExtendedS3TransferManager(mockTransferManager, props)
|
||||
|
||||
expect:
|
||||
extendedManager.estimateParts(fileSize) == expectedParts
|
||||
|
||||
where:
|
||||
fileSize | partSizeStr | expectedParts
|
||||
1024 | '8388608' | 1 // 1KB file, 8MB parts = 1 part
|
||||
8388608 | '8388608' | 1 // 8MB file, 8MB parts = 1 part
|
||||
16777216 | '8388608' | 2 // 16MB file, 8MB parts = 2 parts
|
||||
100000000 | '8388608' | 10 // ~95MB file, 8MB parts = 10 parts (capped at DEFAULT_INIT_BUFFER_PARTS)
|
||||
500000000 | '8388608' | 10 // ~476MB file, 8MB parts = 10 parts (capped at DEFAULT_INIT_BUFFER_PARTS)
|
||||
1048576 | '1048576' | 1 // 1MB file, 1MB parts = 1 part
|
||||
10485760 | '1048576' | 10 // 10MB file, 1MB parts = 10 parts (capped at DEFAULT_INIT_BUFFER_PARTS)
|
||||
}
|
||||
|
||||
|
||||
def 'should calculate downloadPermits correctly'() {
|
||||
given:
|
||||
def mockTransferManager = Mock(S3TransferManager)
|
||||
def props = new Properties()
|
||||
props.setProperty('max_download_heap_memory', maxBuffer)
|
||||
props.setProperty('minimum_part_size', partSize)
|
||||
|
||||
when:
|
||||
def extendedManager = new ExtendedS3TransferManager(mockTransferManager, props)
|
||||
|
||||
then:
|
||||
extendedManager.downloadPermits == expectedMaxParts
|
||||
|
||||
where:
|
||||
maxBuffer | partSize | expectedMaxParts
|
||||
'419430400' | '8388608' | 50 // 400MB / 8MB
|
||||
'104857600' | '8388608' | 12 // 100MB / 8MB
|
||||
'838860800' | '8388608' | 100 // 800MB / 8MB
|
||||
'419430400' | '16777216' | 25 // 400MB / 16MB
|
||||
}
|
||||
|
||||
def 'should handle zero or negative file sizes in estimateParts'() {
|
||||
given:
|
||||
def mockTransferManager = Mock(S3TransferManager)
|
||||
def props = new Properties()
|
||||
def extendedManager = new ExtendedS3TransferManager(mockTransferManager, props)
|
||||
|
||||
expect:
|
||||
extendedManager.estimateParts(0) == 1
|
||||
extendedManager.estimateParts(-100) == 1
|
||||
}
|
||||
|
||||
def 'should handle large file sizes in estimateParts'() {
|
||||
given:
|
||||
def mockTransferManager = Mock(S3TransferManager)
|
||||
def props = new Properties()
|
||||
def extendedManager = new ExtendedS3TransferManager(mockTransferManager, props)
|
||||
|
||||
when:
|
||||
def parts = extendedManager.estimateParts(Long.MAX_VALUE)
|
||||
|
||||
then:
|
||||
parts == 10 // Should be capped at DEFAULT_INIT_BUFFER_PARTS
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package nextflow.cloud.aws.nio.util
|
||||
|
||||
import nextflow.cloud.aws.config.AwsConfig
|
||||
import software.amazon.awssdk.http.SdkHttpConfigurationOption
|
||||
import spock.lang.Specification
|
||||
|
||||
class S3ClientConfigurationTest extends Specification{
|
||||
def 'create S3 synchronous client configuration' (){
|
||||
given:
|
||||
def props = new Properties()
|
||||
def config = new AwsConfig([client: [connectionTimeout: 20000, maxConnections: 100, maxErrorRetry: 3, socketTimeout: 20000,
|
||||
proxyHost: 'host.com', proxyPort: 80, proxyScheme: 'https', proxyUsername: 'user', proxyPassword: 'pass']])
|
||||
props.putAll(config.getS3LegacyProperties())
|
||||
when:
|
||||
def clientConfig = S3SyncClientConfiguration.create(props)
|
||||
then:
|
||||
def overrideConfig = clientConfig.getClientOverrideConfiguration()
|
||||
overrideConfig.retryStrategy().get().maxAttempts() == 4
|
||||
def httpClientbuilder = clientConfig.getHttpClientBuilder()
|
||||
httpClientbuilder.proxyConfiguration.host() == 'host.com'
|
||||
httpClientbuilder.proxyConfiguration.port() == 80
|
||||
httpClientbuilder.proxyConfiguration.scheme() == 'https'
|
||||
httpClientbuilder.proxyConfiguration.username() == 'user'
|
||||
httpClientbuilder.proxyConfiguration.password() == 'pass'
|
||||
httpClientbuilder.standardOptions.get(SdkHttpConfigurationOption.CONNECTION_TIMEOUT).toMillis()== 20000
|
||||
httpClientbuilder.standardOptions.get(SdkHttpConfigurationOption.READ_TIMEOUT).toMillis() == 20000 //socket timeout
|
||||
httpClientbuilder.standardOptions.get(SdkHttpConfigurationOption.MAX_CONNECTIONS) == 100
|
||||
}
|
||||
|
||||
def 'create S3 asynchronous client configuration with default socket timeout' (){
|
||||
given:
|
||||
def props = new Properties()
|
||||
def config = new AwsConfig([client: [:]])
|
||||
props.putAll(config.getS3LegacyProperties())
|
||||
when:
|
||||
def clientConfig = S3AsyncClientConfiguration.create(props)
|
||||
then:
|
||||
def httpConfiguration = clientConfig.getCrtHttpConfiguration()
|
||||
httpConfiguration.healthConfiguration().minimumThroughputInBps() == 1
|
||||
httpConfiguration.healthConfiguration().minimumThroughputTimeout().toMillis() == 30000
|
||||
}
|
||||
|
||||
def 'create S3 asynchronous client configuration' (){
|
||||
given:
|
||||
def props = new Properties()
|
||||
def config = new AwsConfig([client: [
|
||||
maxConcurrency: 10, maxNativeMemory: '500MB', minimumPartSize: '7MB', multipartThreshold: '32MB',
|
||||
targetThroughputInGbps: 15, connectionTimeout: 20000, maxConnections: 100, maxErrorRetry: 3, socketTimeout: 20000,
|
||||
proxyHost: 'host.com', proxyPort: 80, proxyScheme: 'https', proxyUsername: 'user', proxyPassword: 'pass']])
|
||||
props.putAll(config.getS3LegacyProperties())
|
||||
when:
|
||||
def clientConfig = S3AsyncClientConfiguration.create(props)
|
||||
then:
|
||||
def overrideConfig = clientConfig.getClientOverrideConfiguration()
|
||||
overrideConfig.retryStrategy().get().maxAttempts() == 4
|
||||
// Check Crt performance settings
|
||||
clientConfig.getMaxConcurrency() == 10
|
||||
clientConfig.getMaxNativeMemoryInBytes() == 524288000L
|
||||
clientConfig.getTargetThroughputInGbps() == 15
|
||||
// Check multipartConfig
|
||||
def multipartConfig = clientConfig.getMultipartConfiguration()
|
||||
multipartConfig.thresholdInBytes() == 33554432
|
||||
multipartConfig.minimumPartSizeInBytes() == 7340032
|
||||
// Check Crt http configuration
|
||||
def httpConfiguration = clientConfig.getCrtHttpConfiguration()
|
||||
httpConfiguration.proxyConfiguration().host() == 'host.com'
|
||||
httpConfiguration.proxyConfiguration().port() == 80
|
||||
httpConfiguration.proxyConfiguration().scheme() == 'https'
|
||||
httpConfiguration.proxyConfiguration().username() == 'user'
|
||||
httpConfiguration.proxyConfiguration().password() == 'pass'
|
||||
//Check Timeout
|
||||
httpConfiguration.healthConfiguration().minimumThroughputInBps() == 1
|
||||
httpConfiguration.healthConfiguration().minimumThroughputTimeout().toMillis() == 20000
|
||||
//Check Crt Retry Configuration
|
||||
def retryConfig = clientConfig.getCrtRetryConfiguration()
|
||||
retryConfig.numRetries() == 3
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.util
|
||||
|
||||
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
|
||||
import spock.lang.Specification
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class AwsHelperTest extends Specification {
|
||||
|
||||
def 'should parse S3 acl' () {
|
||||
expect:
|
||||
AwsHelper.parseS3Acl('PublicRead') == ObjectCannedACL.PUBLIC_READ
|
||||
AwsHelper.parseS3Acl('public-read') == ObjectCannedACL.PUBLIC_READ
|
||||
AwsHelper.parseS3Acl('Private') == ObjectCannedACL.PRIVATE
|
||||
AwsHelper.parseS3Acl('private') == ObjectCannedACL.PRIVATE
|
||||
when:
|
||||
AwsHelper.parseS3Acl('unknown')
|
||||
then:
|
||||
thrown(IllegalArgumentException)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,875 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.util
|
||||
|
||||
import nextflow.Global
|
||||
import nextflow.Session
|
||||
import nextflow.cloud.aws.batch.AwsOptions
|
||||
import nextflow.util.Duration
|
||||
import spock.lang.Specification
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class S3BashLibTest extends Specification {
|
||||
|
||||
// -- legacy
|
||||
|
||||
def 'should get uploader script' () {
|
||||
|
||||
given:
|
||||
def opts = Mock(AwsOptions)
|
||||
|
||||
when:
|
||||
def script = S3BashLib.script(opts)
|
||||
then:
|
||||
1 * opts.getAwsCli() >> 'aws'
|
||||
1 * opts.getStorageClass() >> null
|
||||
1 * opts.getStorageEncryption() >> null
|
||||
|
||||
script == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=1
|
||||
local timeout=10
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( \$attempt < \$max_attempts ))
|
||||
do
|
||||
if "\$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=\$?
|
||||
fi
|
||||
if [[ \$exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep \$timeout
|
||||
attempt=\$(( attempt + 1 ))
|
||||
timeout=\$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n\'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''
|
||||
.stripIndent(true)
|
||||
}
|
||||
|
||||
def 'should set storage class and encryption' () {
|
||||
|
||||
given:
|
||||
def opts = Mock(AwsOptions)
|
||||
|
||||
when:
|
||||
def script = S3BashLib.script(opts)
|
||||
then:
|
||||
opts.getStorageClass() >> 'S-CLAZZ'
|
||||
opts.getStorageEncryption() >> 'S-ENCRYPT'
|
||||
opts.getAwsCli() >> '/foo/bin/aws'
|
||||
opts.getMaxParallelTransfers() >> 33
|
||||
|
||||
script == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=1
|
||||
local timeout=10
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( \$attempt < \$max_attempts ))
|
||||
do
|
||||
if "\$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=\$?
|
||||
fi
|
||||
if [[ \$exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep \$timeout
|
||||
attempt=\$(( attempt + 1 ))
|
||||
timeout=\$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n\'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>33 )); then echo 33; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
/foo/bin/aws s3 cp --only-show-errors --sse S-ENCRYPT --storage-class S-CLAZZ - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
/foo/bin/aws s3 cp --only-show-errors --recursive --sse S-ENCRYPT --storage-class S-CLAZZ "$name" "$s3path/$name"
|
||||
else
|
||||
/foo/bin/aws s3 cp --only-show-errors --sse S-ENCRYPT --storage-class S-CLAZZ "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(/foo/bin/aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
/foo/bin/aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
/foo/bin/aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''
|
||||
.stripIndent(true)
|
||||
|
||||
}
|
||||
|
||||
// -- new test
|
||||
|
||||
def 'should create base script' () {
|
||||
given:
|
||||
Global.session = Mock(Session) {
|
||||
getConfig() >> [:]
|
||||
}
|
||||
|
||||
expect:
|
||||
S3BashLib.script() == '''
|
||||
# aws cli retry config
|
||||
export AWS_RETRY_MODE=standard
|
||||
export AWS_MAX_ATTEMPTS=5
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
def 'should create base script with legacy retry mode' () {
|
||||
given:
|
||||
Global.session = Mock(Session) {
|
||||
getConfig() >> [aws:[batch: [maxTransferAttempts: 100, retryMode: 'legacy']]]
|
||||
}
|
||||
|
||||
expect:
|
||||
S3BashLib.script() == '''
|
||||
# aws cli retry config
|
||||
export AWS_RETRY_MODE=legacy
|
||||
export AWS_MAX_ATTEMPTS=100
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
def 'should create base script with built-in retry mode' () {
|
||||
given:
|
||||
Global.session = Mock(Session) {
|
||||
getConfig() >> [aws:[batch: [retryMode: 'built-in']]]
|
||||
}
|
||||
|
||||
expect:
|
||||
S3BashLib.script() == '''
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
def 'should create base script with custom settings' () {
|
||||
given:
|
||||
Global.session = Mock(Session) {
|
||||
getConfig() >> [aws:[batch: [cliPath: '/some/bin/aws', retryMode: 'legacy', maxTransferAttempts: 99]]]
|
||||
}
|
||||
|
||||
expect:
|
||||
S3BashLib.script() == '''
|
||||
# aws cli retry config
|
||||
export AWS_RETRY_MODE=legacy
|
||||
export AWS_MAX_ATTEMPTS=99
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
/some/bin/aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
/some/bin/aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
/some/bin/aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(/some/bin/aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
/some/bin/aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
/some/bin/aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
def 'should create base script with custom options' () {
|
||||
given:
|
||||
def opts = Mock(AwsOptions) {
|
||||
getMaxParallelTransfers() >> 5
|
||||
getMaxTransferAttempts() >> 10
|
||||
getDelayBetweenAttempts() >> Duration.of('20s')
|
||||
}
|
||||
|
||||
expect:
|
||||
S3BashLib.script(opts) == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=10
|
||||
local timeout=20
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( $attempt < $max_attempts ))
|
||||
do
|
||||
if "$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=$?
|
||||
fi
|
||||
if [[ $exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep $timeout
|
||||
attempt=$(( attempt + 1 ))
|
||||
timeout=$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>5 )); then echo 5; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
|
||||
def 'should create base script with options' () {
|
||||
given:
|
||||
def opts = Mock(AwsOptions)
|
||||
|
||||
expect:
|
||||
S3BashLib.script(opts) == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=1
|
||||
local timeout=10
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( $attempt < $max_attempts ))
|
||||
do
|
||||
if "$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=$?
|
||||
fi
|
||||
if [[ $exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep $timeout
|
||||
attempt=$(( attempt + 1 ))
|
||||
timeout=$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
def 'should create with storage encrypt' () {
|
||||
given:
|
||||
def sess1 = Mock(Session) {
|
||||
getConfig() >> [aws: [ client: [ storageKmsKeyId: 'my-kms-key', storageEncryption: 'aws:kms']]]
|
||||
}
|
||||
and:
|
||||
def opts = new AwsOptions(sess1)
|
||||
|
||||
expect:
|
||||
S3BashLib.script(opts) == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=5
|
||||
local timeout=10
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( $attempt < $max_attempts ))
|
||||
do
|
||||
if "$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=$?
|
||||
fi
|
||||
if [[ $exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep $timeout
|
||||
attempt=$(( attempt + 1 ))
|
||||
timeout=$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws cli retry config
|
||||
export AWS_RETRY_MODE=standard
|
||||
export AWS_MAX_ATTEMPTS=5
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --sse aws:kms --sse-kms-key-id my-kms-key --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --sse aws:kms --sse-kms-key-id my-kms-key --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --sse aws:kms --sse-kms-key-id my-kms-key --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
|
||||
def 'should create with s3 acl' () {
|
||||
given:
|
||||
def sess1 = Mock(Session) {
|
||||
getConfig() >> [aws: [ client: [ s3Acl: 'PublicRead']]]
|
||||
}
|
||||
and:
|
||||
def opts = new AwsOptions(sess1)
|
||||
|
||||
expect:
|
||||
S3BashLib.script(opts) == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=5
|
||||
local timeout=10
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( $attempt < $max_attempts ))
|
||||
do
|
||||
if "$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=$?
|
||||
fi
|
||||
if [[ $exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep $timeout
|
||||
attempt=$(( attempt + 1 ))
|
||||
timeout=$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws cli retry config
|
||||
export AWS_RETRY_MODE=standard
|
||||
export AWS_MAX_ATTEMPTS=5
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --acl public-read --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --acl public-read --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --acl public-read --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
def 'should create s5cmd script' () {
|
||||
given:
|
||||
Global.session = Mock(Session) {
|
||||
getConfig() >> [aws:[batch:[platformType: 'fargate', cliPath: 's5cmd']]]
|
||||
}
|
||||
|
||||
expect:
|
||||
S3BashLib.script() == '''
|
||||
# aws helper for s5cmd
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
local tmp=$(nxf_mktemp)
|
||||
cp /dev/stdin $tmp/$name
|
||||
s5cmd cp --storage-class STANDARD $tmp/$name "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
s5cmd cp --storage-class STANDARD "$name/" "$s3path/$name/"
|
||||
else
|
||||
s5cmd cp --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(s5cmd ls $source | grep -F "DIR ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
s5cmd cp "$source/*" "$target"
|
||||
else
|
||||
s5cmd cp "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
def 'should create s5cmd script with acl' () {
|
||||
given:
|
||||
Global.session = Mock(Session) {
|
||||
getConfig() >> [aws:[batch:[platformType: 'fargate', cliPath: 's5cmd'], client:[ s3Acl: 'PublicRead']]]
|
||||
}
|
||||
|
||||
expect:
|
||||
S3BashLib.script() == '''
|
||||
# aws helper for s5cmd
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
local tmp=$(nxf_mktemp)
|
||||
cp /dev/stdin $tmp/$name
|
||||
s5cmd cp --acl public-read --storage-class STANDARD $tmp/$name "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
s5cmd cp --acl public-read --storage-class STANDARD "$name/" "$s3path/$name/"
|
||||
else
|
||||
s5cmd cp --acl public-read --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(s5cmd ls $source | grep -F "DIR ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
s5cmd cp "$source/*" "$target"
|
||||
else
|
||||
s5cmd cp "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
def 'should create script with force glacier transfer' () {
|
||||
given:
|
||||
Global.session = Mock(Session) {
|
||||
getConfig() >> [aws:[batch:[forceGlacierTransfer: true]]]
|
||||
}
|
||||
|
||||
expect:
|
||||
S3BashLib.script() == '''
|
||||
# aws cli retry config
|
||||
export AWS_RETRY_MODE=standard
|
||||
export AWS_MAX_ATTEMPTS=5
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --force-glacier-transfer "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.util
|
||||
|
||||
import nextflow.cloud.aws.nio.S3Path
|
||||
import spock.lang.Specification
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class S3PathFactoryTest extends Specification {
|
||||
|
||||
def 'should parse s3 paths' () {
|
||||
|
||||
when:
|
||||
def path = S3PathFactory.parse(S3_PATH)
|
||||
then:
|
||||
path instanceof S3Path
|
||||
with(path as S3Path) {
|
||||
getBucket() == BUCKET
|
||||
getKey() == KEY
|
||||
}
|
||||
|
||||
when:
|
||||
def str = S3PathFactory.getUriString(path)
|
||||
then:
|
||||
str == S3_PATH
|
||||
|
||||
|
||||
where:
|
||||
S3_PATH | BUCKET | KEY
|
||||
's3://cbcrg-eu/raw/x_r1.fq' | 'cbcrg-eu' | 'raw/x_r1.fq'
|
||||
's3://cbcrg-eu/raw/**_R1*{fastq,fq,fastq.gz,fq.gz}' | 'cbcrg-eu' | 'raw/**_R1*{fastq,fq,fastq.gz,fq.gz}'
|
||||
|
||||
}
|
||||
|
||||
def 'should ignore double slashes' () {
|
||||
when:
|
||||
def path = S3PathFactory.parse('s3://cbcrg-eu/raw//x_r1.fq' )
|
||||
then:
|
||||
S3PathFactory.getUriString(path) == 's3://cbcrg-eu/raw/x_r1.fq'
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.cloud.aws.util
|
||||
|
||||
import nextflow.cloud.aws.nio.S3Path
|
||||
import nextflow.file.FileHelper
|
||||
import spock.lang.Specification
|
||||
import spock.lang.Unroll
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class S3PathTest extends Specification {
|
||||
|
||||
@Unroll
|
||||
def 'should convert to uri string' () {
|
||||
|
||||
expect:
|
||||
FileHelper.asPath(PATH).toUriString() == STR
|
||||
|
||||
where:
|
||||
_ | PATH | STR
|
||||
_ | 's3://foo' | 's3://foo/'
|
||||
_ | 's3://foo/bar' | 's3://foo/bar'
|
||||
_ | 's3://foo/b a r' | 's3://foo/b a r'
|
||||
_ | 's3://f o o/bar' | 's3://f o o/bar'
|
||||
_ | 's3://f_o_o/bar' | 's3://f_o_o/bar'
|
||||
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should convert to string' () {
|
||||
|
||||
expect:
|
||||
FileHelper.asPath(PATH).toString() == STR
|
||||
|
||||
where:
|
||||
_ | PATH | STR
|
||||
_ | 's3://foo' | '/foo/'
|
||||
_ | 's3://foo/bar' | '/foo/bar'
|
||||
_ | 's3://foo/b a r' | '/foo/b a r'
|
||||
_ | 's3://f o o/bar' | '/f o o/bar'
|
||||
_ | 's3://f_o_o/bar' | '/f_o_o/bar'
|
||||
|
||||
}
|
||||
|
||||
def 'should check equals and hashcode' () {
|
||||
given:
|
||||
def path1 = FileHelper.asPath('s3://foo/some/foo.txt')
|
||||
def path2 = FileHelper.asPath('s3://foo/some/foo.txt')
|
||||
def path3 = FileHelper.asPath('s3://foo/some/bar.txt')
|
||||
def path4 = FileHelper.asPath('s3://bar/some/foo.txt')
|
||||
|
||||
expect:
|
||||
path1 == path2
|
||||
path1 != path3
|
||||
path3 != path4
|
||||
and:
|
||||
path1.hashCode() == path2.hashCode()
|
||||
path1.hashCode() != path3.hashCode()
|
||||
path3.hashCode() != path4.hashCode()
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should determine bucket name' () {
|
||||
expect:
|
||||
S3Path.bucketName(new URI(URI_PATH)) == BUCKET
|
||||
|
||||
where:
|
||||
URI_PATH | BUCKET
|
||||
's3:///' | null
|
||||
's3:///foo' | 'foo'
|
||||
's3:///foo/' | 'foo'
|
||||
's3:///foo/bar' | 'foo'
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should normalise path' () {
|
||||
expect:
|
||||
FileHelper.asPath(PATH).normalize() == FileHelper.asPath(EXPECTED)
|
||||
|
||||
where:
|
||||
PATH | EXPECTED
|
||||
's3://foo' | 's3://foo'
|
||||
's3://foo/x/y/z.txt' | 's3://foo/x/y/z.txt'
|
||||
's3://foo/x/y/./z.txt' | 's3://foo/x/y/z.txt'
|
||||
's3://foo/x/y/../z.txt' | 's3://foo/x/z.txt'
|
||||
's3://foo/x/y/../../z.txt' | 's3://foo/z.txt'
|
||||
's3://foo/x/y//z.txt' | 's3://foo/x/y/z.txt'
|
||||
's3://foo/./z.txt' | 's3://foo/z.txt'
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.executor
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import nextflow.Session
|
||||
import nextflow.SysEnv
|
||||
import nextflow.cloud.aws.batch.AwsBatchExecutor
|
||||
import nextflow.cloud.aws.batch.AwsOptions
|
||||
import nextflow.cloud.aws.util.S3PathFactory
|
||||
import nextflow.processor.TaskHandler
|
||||
import nextflow.processor.TaskRun
|
||||
import spock.lang.Specification
|
||||
import spock.lang.Unroll
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class AwsBatchExecutorTest extends Specification {
|
||||
|
||||
def 'should check is fusion' () {
|
||||
given:
|
||||
SysEnv.push(ENV)
|
||||
and:
|
||||
def sess = Mock(Session) {
|
||||
getConfig() >> CONFIG
|
||||
}
|
||||
def executor = new AwsBatchExecutor(session: sess)
|
||||
|
||||
expect:
|
||||
executor.isFusionEnabled() == EXPECTED
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
|
||||
where:
|
||||
CONFIG | ENV | EXPECTED
|
||||
[:] | [:] | false
|
||||
[fusion:[enabled: true]] | [:] | true
|
||||
[fusion:[enabled: false]] | [FUSION_ENABLED:'true'] | false // <-- config has priority
|
||||
[:] | [FUSION_ENABLED:'true'] | true
|
||||
[:] | [FUSION_ENABLED:'false'] | false
|
||||
|
||||
}
|
||||
|
||||
def 'should validate shouldDeleteJob method' () {
|
||||
given:
|
||||
def executor = Spy(AwsBatchExecutor)
|
||||
|
||||
expect:
|
||||
executor.shouldDeleteJob('job-1')
|
||||
executor.shouldDeleteJob('job-2')
|
||||
executor.shouldDeleteJob('job-3')
|
||||
and:
|
||||
!executor.shouldDeleteJob('job-1')
|
||||
!executor.shouldDeleteJob('job-1')
|
||||
!executor.shouldDeleteJob('job-2')
|
||||
!executor.shouldDeleteJob('job-2')
|
||||
!executor.shouldDeleteJob('job-3')
|
||||
!executor.shouldDeleteJob('job-3')
|
||||
}
|
||||
|
||||
def 'should get array index variable and start' () {
|
||||
given:
|
||||
def executor = Spy(AwsBatchExecutor)
|
||||
expect:
|
||||
executor.getArrayIndexName() == 'AWS_BATCH_JOB_ARRAY_INDEX'
|
||||
executor.getArrayIndexStart() == 0
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should get array task id' () {
|
||||
given:
|
||||
def executor = Spy(AwsBatchExecutor)
|
||||
expect:
|
||||
executor.getArrayTaskId(JOB_ID, TASK_INDEX) == EXPECTED
|
||||
|
||||
where:
|
||||
JOB_ID | TASK_INDEX | EXPECTED
|
||||
'foo' | 1 | 'foo:1'
|
||||
'bar' | 2 | 'bar:2'
|
||||
}
|
||||
|
||||
protected Path s3(String path) {
|
||||
S3PathFactory.parse('s3:/' + path)
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should get array task id' () {
|
||||
given:
|
||||
def executor = Spy(AwsBatchExecutor) {
|
||||
isFusionEnabled()>>FUSION
|
||||
isWorkDirDefaultFS()>>DEFAULT_FS
|
||||
}
|
||||
and:
|
||||
def handler = Mock(TaskHandler) {
|
||||
getTask() >> Mock(TaskRun) { getWorkDir() >> WORK_DIR }
|
||||
}
|
||||
expect:
|
||||
executor.getArrayWorkDir(handler) == EXPECTED
|
||||
|
||||
where:
|
||||
FUSION | DEFAULT_FS | WORK_DIR | EXPECTED
|
||||
false | false | s3('/foo/work/dir') | 's3://foo/work/dir'
|
||||
true | false | s3('/foo/work/dir') | '/fusion/s3/foo/work/dir'
|
||||
false | true | Path.of('/nfs/work') | '/nfs/work'
|
||||
}
|
||||
|
||||
def 'should get array launch command' (){
|
||||
given:
|
||||
def executor = Spy(AwsBatchExecutor) {
|
||||
isFusionEnabled()>>FUSION
|
||||
isWorkDirDefaultFS()>>DEFAULT_FS
|
||||
getAwsOptions() >> Mock(AwsOptions) {
|
||||
getS5cmdPath() >> { S5CMD ? 's5cmd' : null }
|
||||
getAwsCli() >> { 'aws' }
|
||||
}
|
||||
}
|
||||
expect:
|
||||
executor.getArrayLaunchCommand(TASK_DIR) == EXPECTED
|
||||
|
||||
where:
|
||||
FUSION | DEFAULT_FS | S5CMD | TASK_DIR | EXPECTED
|
||||
false | false | false | 's3://foo/work/dir' | 'bash -o pipefail -c \'trap "[[ -n \\$pid ]] && kill -TERM \\$pid" TERM; trap "{ ret=$?; aws s3 cp --only-show-errors .command.log s3://foo/work/dir/.command.log||true; exit $ret; }" EXIT; aws s3 cp --only-show-errors s3://foo/work/dir/.command.run - | bash > >(tee .command.log) 2>&1 & pid=$!; wait $pid\''
|
||||
false | false | true | 's3://foo/work/dir' | 'bash -o pipefail -c \'trap "[[ -n \\$pid ]] && kill -TERM \\$pid" TERM; trap "{ ret=$?; s5cmd cp .command.log s3://foo/work/dir/.command.log||true; exit $ret; }" EXIT; s5cmd cat s3://foo/work/dir/.command.run | bash > >(tee .command.log) 2>&1 & pid=$!; wait $pid\''
|
||||
and:
|
||||
true | false | false | '/fusion/work/dir' | 'bash /fusion/work/dir/.command.run'
|
||||
false | true | false | '/nfs/work/dir' | 'bash /nfs/work/dir/.command.run 2>&1 > /nfs/work/dir/.command.log'
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,217 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.executor
|
||||
|
||||
import java.nio.file.Paths
|
||||
|
||||
import nextflow.Global
|
||||
import nextflow.Session
|
||||
import nextflow.cloud.aws.batch.AwsBatchFileCopyStrategy
|
||||
import nextflow.cloud.aws.batch.AwsOptions
|
||||
import nextflow.cloud.aws.util.S3PathFactory
|
||||
import nextflow.processor.TaskBean
|
||||
import spock.lang.Specification
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class BashWrapperBuilderWithS3Test extends Specification {
|
||||
|
||||
def 'should include s3 helpers' () {
|
||||
given:
|
||||
Global.session = Mock(Session) { getConfig() >> [:] }
|
||||
and:
|
||||
def folder = Paths.get('/work/dir')
|
||||
def target = S3PathFactory.parse('s3://some/buck et') // <-- path with blank
|
||||
|
||||
def bean = new TaskBean([
|
||||
name: 'Hello 1',
|
||||
workDir: folder,
|
||||
targetDir: target,
|
||||
scratch: true,
|
||||
outputFiles: ['test.bam','test.bai', 'bla nk.txt'], // <-- file name with blank
|
||||
script: 'echo Hello world!',
|
||||
])
|
||||
|
||||
def copy = new SimpleFileCopyStrategy(bean)
|
||||
|
||||
/*
|
||||
* simple bash run
|
||||
*/
|
||||
when:
|
||||
def binding = new BashWrapperBuilder(bean,copy).makeBinding()
|
||||
then:
|
||||
binding.unstage_outputs == '''\
|
||||
IFS=$'\\n'
|
||||
for name in $(eval "ls -1d test.bam test.bai bla\\ nk.txt" | sort | uniq); do
|
||||
nxf_s3_upload $name s3://some/buck\\ et
|
||||
done
|
||||
unset IFS
|
||||
'''.stripIndent().rightTrim()
|
||||
|
||||
binding.helpers_script == '''\
|
||||
# aws cli retry config
|
||||
export AWS_RETRY_MODE=standard
|
||||
export AWS_MAX_ATTEMPTS=5
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
|
||||
def 'should include s3 helpers and bash lib' () {
|
||||
given:
|
||||
Global.session = Mock(Session) { getConfig() >> [:] }
|
||||
and:
|
||||
def folder = Paths.get('/work/dir')
|
||||
def target = S3PathFactory.parse('s3://some/bucket')
|
||||
|
||||
def bean = new TaskBean([
|
||||
name: 'Hello 1',
|
||||
workDir: folder,
|
||||
targetDir: target,
|
||||
scratch: true,
|
||||
outputFiles: ['test.bam','test.bai'],
|
||||
script: 'echo Hello world!',
|
||||
])
|
||||
|
||||
def copy = new AwsBatchFileCopyStrategy(bean, Mock(AwsOptions))
|
||||
|
||||
/*
|
||||
* simple bash run
|
||||
*/
|
||||
when:
|
||||
def binding = new BashWrapperBuilder(bean,copy).makeBinding()
|
||||
then:
|
||||
binding.unstage_outputs == '''\
|
||||
uploads=()
|
||||
IFS=$'\\n'
|
||||
for name in $(eval "ls -1d test.bam test.bai" | sort | uniq); do
|
||||
uploads+=("nxf_s3_upload '$name' s3://some/bucket")
|
||||
done
|
||||
unset IFS
|
||||
nxf_parallel "${uploads[@]}"
|
||||
'''.stripIndent()
|
||||
|
||||
binding.helpers_script == '''\
|
||||
# bash helper functions
|
||||
nxf_cp_retry() {
|
||||
local max_attempts=1
|
||||
local timeout=10
|
||||
local attempt=0
|
||||
local exitCode=0
|
||||
while (( \$attempt < \$max_attempts ))
|
||||
do
|
||||
if "\$@"
|
||||
then
|
||||
return 0
|
||||
else
|
||||
exitCode=\$?
|
||||
fi
|
||||
if [[ \$exitCode == 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
nxf_sleep \$timeout
|
||||
attempt=\$(( attempt + 1 ))
|
||||
timeout=\$(( timeout * 2 ))
|
||||
done
|
||||
}
|
||||
|
||||
nxf_parallel() {
|
||||
IFS=$'\\n\'
|
||||
local cmd=("$@")
|
||||
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
|
||||
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
|
||||
local i=0
|
||||
local pid=()
|
||||
(
|
||||
set +u
|
||||
while ((i<${#cmd[@]})); do
|
||||
local copy=()
|
||||
for x in "${pid[@]}"; do
|
||||
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
|
||||
# see https://github.com/nextflow-io/nextflow/pull/4050
|
||||
[[ -e /proc/$x ]] && copy+=($x) || wait $x
|
||||
done
|
||||
pid=("${copy[@]}")
|
||||
|
||||
if ((${#pid[@]}>=$max)); then
|
||||
nxf_sleep 0.2
|
||||
else
|
||||
eval "${cmd[$i]}" &
|
||||
pid+=($!)
|
||||
((i+=1))
|
||||
fi
|
||||
done
|
||||
for p in "${pid[@]}"; do
|
||||
wait $p
|
||||
done
|
||||
)
|
||||
unset IFS
|
||||
}
|
||||
|
||||
# aws helper
|
||||
nxf_s3_upload() {
|
||||
local name=$1
|
||||
local s3path=$2
|
||||
if [[ "$name" == - ]]; then
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
|
||||
elif [[ -d "$name" ]]; then
|
||||
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
|
||||
else
|
||||
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
|
||||
fi
|
||||
}
|
||||
|
||||
nxf_s3_download() {
|
||||
local source=$1
|
||||
local target=$2
|
||||
local file_name=$(basename $1)
|
||||
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
|
||||
if [[ $is_dir == 1 ]]; then
|
||||
aws s3 cp --only-show-errors --recursive "$source" "$target"
|
||||
else
|
||||
aws s3 cp --only-show-errors "$source" "$target"
|
||||
fi
|
||||
}
|
||||
|
||||
'''.stripIndent(true)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.executor
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import nextflow.Global
|
||||
import nextflow.SysEnv
|
||||
import nextflow.cloud.aws.util.S3PathFactory
|
||||
import nextflow.fusion.FusionScriptLauncher
|
||||
import nextflow.processor.TaskBean
|
||||
import spock.lang.Specification
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class FusionScriptLauncherS3Test extends Specification {
|
||||
|
||||
def 'should get container mount' () {
|
||||
given:
|
||||
Global.config = Collections.emptyMap()
|
||||
and:
|
||||
def fusion = new FusionScriptLauncher(Mock(TaskBean), 's3', null)
|
||||
|
||||
when:
|
||||
def result = fusion.toContainerMount(S3PathFactory.parse('s3://foo/a/b/c.txt'))
|
||||
then:
|
||||
result == Path.of('/fusion/s3/foo/a/b/c.txt')
|
||||
|
||||
when:
|
||||
result = fusion.toContainerMount(S3PathFactory.parse('s3://foo/a/x/y.txt'))
|
||||
then:
|
||||
result == Path.of('/fusion/s3/foo/a/x/y.txt')
|
||||
|
||||
when:
|
||||
result = fusion.toContainerMount(S3PathFactory.parse('s3://bar/z.txt'))
|
||||
then:
|
||||
result == Path.of('/fusion/s3/bar/z.txt')
|
||||
|
||||
}
|
||||
|
||||
|
||||
def 'should get fusion env with s3 endpoint' () {
|
||||
given:
|
||||
Global.config = [:]
|
||||
and:
|
||||
SysEnv.push([AWS_S3_ENDPOINT: 'http://foo.com'])
|
||||
and:
|
||||
def fusion = new FusionScriptLauncher(Mock(TaskBean), 's3', S3PathFactory.parse('s3://foo/work'))
|
||||
|
||||
expect:
|
||||
fusion.fusionEnv() == [AWS_S3_ENDPOINT: 'http://foo.com',
|
||||
FUSION_WORK: '/fusion/s3/foo/work',
|
||||
FUSION_TAGS: "[.command.*|.exitcode|.fusion.*](nextflow.io/metadata=true),[*](nextflow.io/temporary=true)"
|
||||
]
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
}
|
||||
|
||||
def 'should get fusion env with aws credentials' () {
|
||||
given:
|
||||
SysEnv.push([AWS_ACCESS_KEY_ID: 'xxx', AWS_SECRET_ACCESS_KEY: 'zzz'])
|
||||
and:
|
||||
Global.config = [fusion: [exportAwsAccessKeys: true]]
|
||||
and:
|
||||
def fusion = new FusionScriptLauncher(Mock(TaskBean), 's3', S3PathFactory.parse('s3://foo/work'))
|
||||
|
||||
expect:
|
||||
fusion.fusionEnv() == [AWS_ACCESS_KEY_ID: 'xxx',
|
||||
AWS_SECRET_ACCESS_KEY: 'zzz',
|
||||
FUSION_WORK: '/fusion/s3/foo/work',
|
||||
FUSION_TAGS: "[.command.*|.exitcode|.fusion.*](nextflow.io/metadata=true),[*](nextflow.io/temporary=true)"
|
||||
]
|
||||
|
||||
cleanup:
|
||||
Global.config = null
|
||||
SysEnv.pop()
|
||||
}
|
||||
|
||||
def 'should get fusion env with aws credentials in nextflow config' () {
|
||||
given:
|
||||
SysEnv.push([:])
|
||||
and:
|
||||
def CONFIG = [fusion: [exportAwsAccessKeys: true], aws: [accessKey: 'k1', secretKey: 's1', client: [endpoint: 'http://minio.com']]]
|
||||
Global.config = CONFIG
|
||||
and:
|
||||
def fusion = new FusionScriptLauncher(Mock(TaskBean), 's3', S3PathFactory.parse('s3://foo/work'))
|
||||
|
||||
expect:
|
||||
fusion.fusionEnv() == [AWS_ACCESS_KEY_ID: 'k1',
|
||||
AWS_SECRET_ACCESS_KEY: 's1',
|
||||
AWS_S3_ENDPOINT: 'http://minio.com',
|
||||
FUSION_WORK: '/fusion/s3/foo/work',
|
||||
FUSION_TAGS: "[.command.*|.exitcode|.fusion.*](nextflow.io/metadata=true),[*](nextflow.io/temporary=true)"
|
||||
]
|
||||
|
||||
cleanup:
|
||||
Global.config = null
|
||||
SysEnv.pop()
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.extension
|
||||
|
||||
import java.nio.file.Path
|
||||
import java.nio.file.Paths
|
||||
|
||||
import groovyx.gpars.dataflow.DataflowReadChannel
|
||||
import nextflow.Global
|
||||
import nextflow.Session
|
||||
import test.BaseSpec
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class PublishOpS3Test extends BaseSpec {
|
||||
|
||||
def 'should infer task dir' () {
|
||||
given:
|
||||
Global.config = Collections.emptyMap()
|
||||
and:
|
||||
def BASE = '/some/work/dir' as Path
|
||||
def BUCKET_DIR = 's3://other/bucket/dir' as Path
|
||||
def sess = Mock(Session) {
|
||||
getWorkDir() >> BASE
|
||||
getBucketDir() >> BUCKET_DIR
|
||||
}
|
||||
|
||||
|
||||
def op = new PublishOp(sess, 'foo', Mock(DataflowReadChannel), [to:'/target'])
|
||||
|
||||
when:
|
||||
def result = op.getTaskDir( BASE.resolve('xx/yyyy/this/and/that.txt') )
|
||||
then:
|
||||
result == Paths.get('/some/work/dir/xx/yyyy')
|
||||
|
||||
when:
|
||||
result = op.getTaskDir( BUCKET_DIR.resolve('pp/qqqq/other/file.fasta') )
|
||||
then:
|
||||
result == 's3://other/bucket/dir/pp/qqqq' as Path
|
||||
|
||||
|
||||
when:
|
||||
result = op.getTaskDir( BASE.resolve('xx/foo.txt') )
|
||||
then:
|
||||
result == null
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.file
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import nextflow.Global
|
||||
import nextflow.Session
|
||||
import nextflow.SysEnv
|
||||
import spock.lang.Specification
|
||||
import spock.lang.Unroll
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class FileHelperS3Test extends Specification {
|
||||
|
||||
|
||||
@Unroll
|
||||
def 'should convert to canonical path with base' () {
|
||||
given:
|
||||
SysEnv.push(NXF_FILE_ROOT: 's3://host.com/work')
|
||||
|
||||
expect:
|
||||
FileHelper.toCanonicalPath(VALUE) == EXPECTED
|
||||
|
||||
cleanup:
|
||||
SysEnv.pop()
|
||||
|
||||
where:
|
||||
VALUE | EXPECTED
|
||||
null | null
|
||||
'file.txt' | FileSystemPathFactory.parse('s3://host.com/work/file.txt')
|
||||
Path.of('file.txt') | FileSystemPathFactory.parse('s3://host.com/work/file.txt')
|
||||
and:
|
||||
'./file.txt' | FileSystemPathFactory.parse('s3://host.com/work/file.txt')
|
||||
'.' | FileSystemPathFactory.parse('s3://host.com/work')
|
||||
'./' | FileSystemPathFactory.parse('s3://host.com/work')
|
||||
'../file.txt' | FileSystemPathFactory.parse('s3://host.com/file.txt')
|
||||
and:
|
||||
'/file.txt' | Path.of('/file.txt')
|
||||
Path.of('/file.txt') | Path.of('/file.txt')
|
||||
|
||||
}
|
||||
|
||||
def 'should convert to a canonical path' () {
|
||||
given:
|
||||
Global.session = Mock(Session) { getConfig() >> [:] }
|
||||
|
||||
expect:
|
||||
FileHelper.toCanonicalPath(VALUE).toUri() == EXPECTED
|
||||
|
||||
where:
|
||||
VALUE | EXPECTED
|
||||
's3://foo/some/file.txt' | new URI('s3:///foo/some/file.txt')
|
||||
's3://foo/some///file.txt' | new URI('s3:///foo/some/file.txt')
|
||||
}
|
||||
|
||||
@Unroll
|
||||
def 'should remove consecutive slashes in the path' () {
|
||||
given:
|
||||
Global.session = Mock(Session) { getConfig() >> [:] }
|
||||
|
||||
expect:
|
||||
FileHelper.asPath(STR).toUri() == EXPECTED
|
||||
where:
|
||||
STR | EXPECTED
|
||||
's3://foo//this/that' | new URI('s3:///foo/this/that')
|
||||
's3://foo//this///that' | new URI('s3:///foo/this/that')
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.processor
|
||||
|
||||
import java.nio.file.FileSystems
|
||||
import java.nio.file.Files
|
||||
|
||||
import nextflow.Global
|
||||
import nextflow.Session
|
||||
import nextflow.cloud.aws.nio.S3Path
|
||||
import nextflow.file.FileHelper
|
||||
import spock.lang.Specification
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class PublishDirS3Test extends Specification {
|
||||
|
||||
def 'should change mode to `copy`' () {
|
||||
|
||||
given:
|
||||
def processor = [:] as TaskProcessor
|
||||
processor.name = 'foo'
|
||||
|
||||
def targetDir = FileHelper.asPath( 's3://bucket/work' )
|
||||
def publisher = new PublishDir(mode:'symlink', path: targetDir, sourceFileSystem: FileSystems.default)
|
||||
|
||||
when:
|
||||
publisher.validatePublishMode()
|
||||
then:
|
||||
publisher.mode == PublishDir.Mode.COPY
|
||||
}
|
||||
|
||||
def 'should tag files' () {
|
||||
|
||||
given:
|
||||
def folder = Files.createTempDirectory('test')
|
||||
def source = folder.resolve('hello.txt'); source.text = 'Hello'
|
||||
|
||||
and:
|
||||
def processor = [:] as TaskProcessor
|
||||
processor.name = 'foo'
|
||||
and:
|
||||
def targetDir = FileHelper.asPath( 's3://bucket/work' )
|
||||
def publisher = new PublishDir(tags: [FOO:'this',BAR:'that'], path: targetDir, sourceFileSystem: FileSystems.default)
|
||||
def spy = Spy(publisher)
|
||||
|
||||
when:
|
||||
spy.apply1(source, true)
|
||||
then:
|
||||
1 * spy.safeProcessFile(source, _) >> { sourceFile, s3File ->
|
||||
assert s3File instanceof S3Path
|
||||
assert (s3File as S3Path).getTagsList().find{ it.key()=='FOO'}.value() == 'this'
|
||||
assert (s3File as S3Path).getTagsList().find{ it.key()=='BAR'}.value() == 'that'
|
||||
}
|
||||
|
||||
cleanup:
|
||||
folder?.deleteDir()
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,164 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.util
|
||||
|
||||
import nextflow.cloud.aws.util.ConfigParser
|
||||
import spock.lang.Specification
|
||||
import spock.lang.Unroll
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
class ConfigParserTest extends Specification {
|
||||
|
||||
def 'should parse section' () {
|
||||
given:
|
||||
def parser = new ConfigParser()
|
||||
|
||||
expect:
|
||||
parser.parseSection(LINE) == EXPECTED
|
||||
|
||||
where:
|
||||
LINE | EXPECTED
|
||||
'foo' | null
|
||||
'[foo' | null
|
||||
and:
|
||||
'[foo]' | 'foo'
|
||||
'[profile foo]' | 'foo'
|
||||
}
|
||||
|
||||
def 'should parse config' () {
|
||||
given:
|
||||
def parser = new ConfigParser()
|
||||
def CONFIG = '''
|
||||
[foo]
|
||||
one = 1
|
||||
two = 2
|
||||
[bar]
|
||||
alpha = 3
|
||||
gamma = 4
|
||||
'''.stripIndent()
|
||||
|
||||
when:
|
||||
parser.parseConfig(CONFIG)
|
||||
|
||||
then:
|
||||
parser.content.size() == 2
|
||||
parser.content['foo'] == ['one = 1', 'two = 2']
|
||||
parser.content['bar'] == ['alpha = 3', 'gamma = 4']
|
||||
}
|
||||
|
||||
def 'should not merge overlapping keys' () {
|
||||
given:
|
||||
def parser = new ConfigParser()
|
||||
def CONFIG1 = '''
|
||||
[alpha]
|
||||
a1=1
|
||||
[beta]
|
||||
b2=2
|
||||
b3=3
|
||||
'''.stripIndent()
|
||||
|
||||
def CONFIG2 = '''
|
||||
[beta]
|
||||
b3=30
|
||||
b4=4
|
||||
'''.stripIndent()
|
||||
|
||||
|
||||
when:
|
||||
parser.parseConfig(CONFIG1)
|
||||
parser.parseConfig(CONFIG2)
|
||||
|
||||
then:
|
||||
parser.content.size() == 2
|
||||
and:
|
||||
parser.content['alpha'] == ['a1=1']
|
||||
parser.content['beta'] == ['b2=2','b3=3','b4=4']
|
||||
|
||||
}
|
||||
|
||||
def 'should load and merge config' () {
|
||||
given:
|
||||
def parser = new ConfigParser()
|
||||
def CONFIG1 = '''
|
||||
[alpha]
|
||||
a1
|
||||
'''.stripIndent()
|
||||
|
||||
def CONFIG2 = '''
|
||||
[beta]
|
||||
b1
|
||||
'''.stripIndent()
|
||||
|
||||
def CONFIG3 = '''
|
||||
[alpha]
|
||||
a2
|
||||
|
||||
[beta]
|
||||
b2
|
||||
|
||||
[omega]
|
||||
z9
|
||||
'''.stripIndent()
|
||||
|
||||
when:
|
||||
parser.parseConfig(CONFIG1)
|
||||
parser.parseConfig(CONFIG2)
|
||||
parser.parseConfig(CONFIG3)
|
||||
|
||||
then:
|
||||
parser.content.size() == 3
|
||||
and:
|
||||
parser.content['alpha'] == ['a1','a2']
|
||||
parser.content['beta'] == ['b1','b2']
|
||||
parser.content['omega'] == ['z9']
|
||||
|
||||
expect:
|
||||
parser.text() == '''\
|
||||
[alpha]
|
||||
a1
|
||||
a2
|
||||
[beta]
|
||||
b1
|
||||
b2
|
||||
[omega]
|
||||
z9
|
||||
'''.stripIndent()
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Unroll
|
||||
def 'should match key' () {
|
||||
given:
|
||||
def parser = new ConfigParser()
|
||||
|
||||
expect:
|
||||
parser.findKey(LINE) == EXPECTED
|
||||
|
||||
where:
|
||||
LINE | EXPECTED
|
||||
'foo' | null
|
||||
'foo=' | 'foo'
|
||||
'foo=1' | 'foo'
|
||||
' foo = 1 ' | 'foo'
|
||||
' foo =1 ' | 'foo'
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nextflow.util
|
||||
|
||||
import nextflow.cloud.aws.util.S3PathFactory
|
||||
import spock.lang.Specification
|
||||
/**
|
||||
*
|
||||
* @author Ben Sherman <bentshermann@gmail.com>
|
||||
*/
|
||||
class S3PathSerializerTest extends Specification {
|
||||
|
||||
def 'should serialise s3 path' () {
|
||||
when:
|
||||
def path = S3PathFactory.parse('s3://mybucket/file.txt')
|
||||
def buffer = KryoHelper.serialize(path)
|
||||
then:
|
||||
KryoHelper.deserialize(buffer).getClass().getName() == 'nextflow.cloud.aws.nio.S3Path'
|
||||
KryoHelper.deserialize(buffer) == S3PathFactory.parse('s3://mybucket/file.txt')
|
||||
}
|
||||
|
||||
def 'should serialise s3 path with spaces' () {
|
||||
when:
|
||||
def path = S3PathFactory.parse('s3://mybucket/file with spaces.txt')
|
||||
def buffer = KryoHelper.serialize(path)
|
||||
then:
|
||||
KryoHelper.deserialize(buffer).getClass().getName() == 'nextflow.cloud.aws.nio.S3Path'
|
||||
KryoHelper.deserialize(buffer) == S3PathFactory.parse('s3://mybucket/file with spaces.txt')
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
#
|
||||
# Copyright 2013-2026, Seqera Labs
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
max_copy_size = 10000000
|
||||
upload_chunk_size = 5242880
|
||||
@@ -0,0 +1,31 @@
|
||||
<!--
|
||||
~ Copyright 2013-2026, Seqera Labs
|
||||
~
|
||||
~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~ you may not use this file except in compliance with the License.
|
||||
~ You may obtain a copy of the License at
|
||||
~
|
||||
~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~
|
||||
~ Unless required by applicable law or agreed to in writing, software
|
||||
~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~ See the License for the specific language governing permissions and
|
||||
~ limitations under the License.
|
||||
-->
|
||||
|
||||
<configuration>
|
||||
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder>
|
||||
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} – %m%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<logger name="org.apache.http" level="INFO" />
|
||||
<logger name="software.amazon" level="INFO" />
|
||||
<logger name="com.upplication" level="DEBUG" />
|
||||
|
||||
<root level="DEBUG">
|
||||
<appender-ref ref="STDOUT"/>
|
||||
</root>
|
||||
</configuration>
|
||||
85
nextflow/plugins/nf-azure/README.md
Normal file
85
nextflow/plugins/nf-azure/README.md
Normal file
@@ -0,0 +1,85 @@
|
||||
# Microsoft Azure plugin for Nextflow
|
||||
|
||||
## Summary
|
||||
|
||||
The Microsoft Azure plugin provides support for Azure Blob Storage as a file system, and Azure Batch as a compute executor for Nextflow pipelines.
|
||||
|
||||
## Get Started
|
||||
|
||||
To use this plugin, add it to your `nextflow.config`:
|
||||
|
||||
```groovy
|
||||
plugins {
|
||||
id 'nf-azure'
|
||||
}
|
||||
```
|
||||
|
||||
Configure your Azure credentials and services:
|
||||
|
||||
```groovy
|
||||
azure {
|
||||
storage {
|
||||
accountName = '<YOUR STORAGE ACCOUNT NAME>'
|
||||
accountKey = '<YOUR STORAGE ACCOUNT KEY>'
|
||||
}
|
||||
|
||||
batch {
|
||||
endpoint = 'https://<YOUR BATCH ACCOUNT NAME>.<REGION>.batch.azure.com'
|
||||
accountName = '<YOUR BATCH ACCOUNT NAME>'
|
||||
accountKey = '<YOUR BATCH ACCOUNT KEY>'
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Set the executor and work directory:
|
||||
|
||||
```groovy
|
||||
process.executor = 'azurebatch'
|
||||
workDir = 'az://<YOUR CONTAINER>/work'
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
### Basic Azure Batch Configuration
|
||||
|
||||
```groovy
|
||||
plugins {
|
||||
id 'nf-azure'
|
||||
}
|
||||
|
||||
azure {
|
||||
storage {
|
||||
accountName = 'mystorageaccount'
|
||||
accountKey = System.getenv('AZURE_STORAGE_KEY')
|
||||
}
|
||||
|
||||
batch {
|
||||
endpoint = 'https://mybatchaccount.westeurope.batch.azure.com'
|
||||
accountName = 'mybatchaccount'
|
||||
accountKey = System.getenv('AZURE_BATCH_KEY')
|
||||
autoPoolMode = true
|
||||
deletePoolsOnCompletion = true
|
||||
}
|
||||
}
|
||||
|
||||
process.executor = 'azurebatch'
|
||||
workDir = 'az://mycontainer/work'
|
||||
```
|
||||
|
||||
### Using Managed Identity
|
||||
|
||||
```groovy
|
||||
azure {
|
||||
managedIdentity {
|
||||
clientId = '<YOUR MANAGED IDENTITY CLIENT ID>'
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Resources
|
||||
|
||||
- [Azure Batch Executor Documentation](https://nextflow.io/docs/latest/azure.html)
|
||||
|
||||
## License
|
||||
|
||||
[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0)
|
||||
1
nextflow/plugins/nf-azure/VERSION
Normal file
1
nextflow/plugins/nf-azure/VERSION
Normal file
@@ -0,0 +1 @@
|
||||
1.22.2
|
||||
116
nextflow/plugins/nf-azure/azure-login.json
Normal file
116
nextflow/plugins/nf-azure/azure-login.json
Normal file
@@ -0,0 +1,116 @@
|
||||
[
|
||||
{
|
||||
"cloudName": "AzureCloud",
|
||||
"homeTenantId": "7005851f-400b-4acb-8bc1-12c44a7d39e5",
|
||||
"id": "cb4ff255-ac8c-4721-83bd-2d98e75b50d7",
|
||||
"isDefault": true,
|
||||
"managedByTenants": [],
|
||||
"name": "Free Trial",
|
||||
"state": "Enabled",
|
||||
"tenantId": "7005851f-400b-4acb-8bc1-12c44a7d39e5",
|
||||
"user": {
|
||||
"name": "paolo@seqera.io",
|
||||
"type": "user"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
$ az login
|
||||
$ az group create --name my-storage-group --location westeurope
|
||||
$ az storage account create --resource-group my-resource-group --name nfaccount --location westeurope
|
||||
|
||||
{- Finished ..
|
||||
"accessTier": "Hot",
|
||||
"azureFilesIdentityBasedAuthentication": null,
|
||||
"blobRestoreStatus": null,
|
||||
"creationTime": "2020-05-15T20:42:17.206927+00:00",
|
||||
"customDomain": null,
|
||||
"enableHttpsTrafficOnly": true,
|
||||
"encryption": {
|
||||
"keySource": "Microsoft.Storage",
|
||||
"keyVaultProperties": null,
|
||||
"services": {
|
||||
"blob": {
|
||||
"enabled": true,
|
||||
"keyType": "Account",
|
||||
"lastEnabledTime": "2020-05-15T20:42:17.300678+00:00"
|
||||
},
|
||||
"file": {
|
||||
"enabled": true,
|
||||
"keyType": "Account",
|
||||
"lastEnabledTime": "2020-05-15T20:42:17.300678+00:00"
|
||||
},
|
||||
"queue": null,
|
||||
"table": null
|
||||
}
|
||||
},
|
||||
"failoverInProgress": null,
|
||||
"geoReplicationStats": null,
|
||||
"id": "/subscriptions/cb4ff255-ac8c-4721-83bd-2d98e75b50d7/resourceGroups/my-resource-group/providers/Microsoft.Storage/storageAccounts/nfaccount",
|
||||
"identity": null,
|
||||
"isHnsEnabled": null,
|
||||
"kind": "StorageV2",
|
||||
"largeFileSharesState": null,
|
||||
"lastGeoFailoverTime": null,
|
||||
"location": "westeurope",
|
||||
"name": "nfaccount",
|
||||
"networkRuleSet": {
|
||||
"bypass": "AzureServices",
|
||||
"defaultAction": "Allow",
|
||||
"ipRules": [],
|
||||
"virtualNetworkRules": []
|
||||
},
|
||||
"primaryEndpoints": {
|
||||
"blob": "https://nfaccount.blob.core.windows.net/",
|
||||
"dfs": "https://nfaccount.dfs.core.windows.net/",
|
||||
"file": "https://nfaccount.file.core.windows.net/",
|
||||
"internetEndpoints": null,
|
||||
"microsoftEndpoints": null,
|
||||
"queue": "https://nfaccount.queue.core.windows.net/",
|
||||
"table": "https://nfaccount.table.core.windows.net/",
|
||||
"web": "https://nfaccount.z6.web.core.windows.net/"
|
||||
},
|
||||
"primaryLocation": "westeurope",
|
||||
"privateEndpointConnections": [],
|
||||
"provisioningState": "Succeeded",
|
||||
"resourceGroup": "my-resource-group",
|
||||
"routingPreference": null,
|
||||
"secondaryEndpoints": {
|
||||
"blob": "https://nfaccount-secondary.blob.core.windows.net/",
|
||||
"dfs": "https://nfaccount-secondary.dfs.core.windows.net/",
|
||||
"file": null,
|
||||
"internetEndpoints": null,
|
||||
"microsoftEndpoints": null,
|
||||
"queue": "https://nfaccount-secondary.queue.core.windows.net/",
|
||||
"table": "https://nfaccount-secondary.table.core.windows.net/",
|
||||
"web": "https://nfaccount-secondary.z6.web.core.windows.net/"
|
||||
},
|
||||
"secondaryLocation": "northeurope",
|
||||
"sku": {
|
||||
"name": "Standard_RAGRS",
|
||||
"tier": "Standard"
|
||||
},
|
||||
"statusOfPrimary": "available",
|
||||
"statusOfSecondary": "available",
|
||||
"tags": {},
|
||||
"type": "Microsoft.Storage/storageAccounts"
|
||||
}
|
||||
|
||||
|
||||
Connection string
|
||||
BlobEndpoint=https://nfaccount.blob.core.windows.net/;QueueEndpoint=https://nfaccount.queue.core.windows.net/;FileEndpoint=https://nfaccount.file.core.windows.net/;TableEndpoint=https://nfaccount.table.core.windows.net/;SharedAccessSignature=sv=2019-10-10&ss=bfqt&srt=sco&sp=rwdlacupx&se=2020-05-16T04:48:12Z&st=2020-05-15T20:48:12Z&spr=https&sig=9xCn8O%2FxjKroc7YOc9fHffiNOtRaY46spv9VJa4D8pU%3D
|
||||
|
||||
SAS token
|
||||
?sv=2019-10-10&ss=bfqt&srt=sco&sp=rwdlacupx&se=2020-05-16T04:48:12Z&st=2020-05-15T20:48:12Z&spr=https&sig=9xCn8O%2FxjKroc7YOc9fHffiNOtRaY46spv9VJa4D8pU%3D
|
||||
|
||||
Blob service SAS URL
|
||||
https://nfaccount.blob.core.windows.net/?sv=2019-10-10&ss=bfqt&srt=sco&sp=rwdlacupx&se=2020-05-16T04:48:12Z&st=2020-05-15T20:48:12Z&spr=https&sig=9xCn8O%2FxjKroc7YOc9fHffiNOtRaY46spv9VJa4D8pU%3D
|
||||
|
||||
File service SAS URL
|
||||
https://nfaccount.file.core.windows.net/?sv=2019-10-10&ss=bfqt&srt=sco&sp=rwdlacupx&se=2020-05-16T04:48:12Z&st=2020-05-15T20:48:12Z&spr=https&sig=9xCn8O%2FxjKroc7YOc9fHffiNOtRaY46spv9VJa4D8pU%3D
|
||||
|
||||
Queue service SAS URL
|
||||
https://nfaccount.queue.core.windows.net/?sv=2019-10-10&ss=bfqt&srt=sco&sp=rwdlacupx&se=2020-05-16T04:48:12Z&st=2020-05-15T20:48:12Z&spr=https&sig=9xCn8O%2FxjKroc7YOc9fHffiNOtRaY46spv9VJa4D8pU%3D
|
||||
|
||||
Table service SAS URL
|
||||
https://nfaccount.table.core.windows.net/?sv=2019-10-10&ss=bfqt&srt=sco&sp=rwdlacupx&se=2020-05-16T04:48:12Z&st=2020-05-15T20:48:12Z&spr=https&sig=9xCn8O%2FxjKroc7YOc9fHffiNOtRaY46spv9VJa4D8pU%3D
|
||||
104
nextflow/plugins/nf-azure/azure-login.txt
Normal file
104
nextflow/plugins/nf-azure/azure-login.txt
Normal file
@@ -0,0 +1,104 @@
|
||||
» az login
|
||||
|
||||
» az group create --name nf-storage-group --location westeurope
|
||||
{
|
||||
"id": "/subscriptions/f7ef67b9-51f5-4fc2-91a8-0f9cce0c6598/resourceGroups/nf-storage-group",
|
||||
"location": "westeurope",
|
||||
"managedBy": null,
|
||||
"name": "nf-storage-group",
|
||||
"properties": {
|
||||
"provisioningState": "Succeeded"
|
||||
},
|
||||
"tags": null,
|
||||
"type": "Microsoft.Resources/resourceGroups"
|
||||
}
|
||||
|
||||
» az storage account create --resource-group nf-storage-group --name nfstore --location westeurope
|
||||
{- Finished ..
|
||||
"accessTier": "Hot",
|
||||
"allowBlobPublicAccess": null,
|
||||
"azureFilesIdentityBasedAuthentication": null,
|
||||
"blobRestoreStatus": null,
|
||||
"creationTime": "2020-07-18T07:52:22.585318+00:00",
|
||||
"customDomain": null,
|
||||
"enableHttpsTrafficOnly": true,
|
||||
"encryption": {
|
||||
"keySource": "Microsoft.Storage",
|
||||
"keyVaultProperties": null,
|
||||
"requireInfrastructureEncryption": null,
|
||||
"services": {
|
||||
"blob": {
|
||||
"enabled": true,
|
||||
"keyType": "Account",
|
||||
"lastEnabledTime": "2020-07-18T07:52:22.679222+00:00"
|
||||
},
|
||||
"file": {
|
||||
"enabled": true,
|
||||
"keyType": "Account",
|
||||
"lastEnabledTime": "2020-07-18T07:52:22.679222+00:00"
|
||||
},
|
||||
"queue": null,
|
||||
"table": null
|
||||
}
|
||||
},
|
||||
"failoverInProgress": null,
|
||||
"geoReplicationStats": null,
|
||||
"id": "/subscriptions/f7ef67b9-51f5-4fc2-91a8-0f9cce0c6598/resourceGroups/nf-storage-group/providers/Microsoft.Storage/storageAccounts/nfstore",
|
||||
"identity": null,
|
||||
"isHnsEnabled": null,
|
||||
"kind": "StorageV2",
|
||||
"largeFileSharesState": null,
|
||||
"lastGeoFailoverTime": null,
|
||||
"location": "westeurope",
|
||||
"minimumTlsVersion": null,
|
||||
"name": "nfstore",
|
||||
"networkRuleSet": {
|
||||
"bypass": "AzureServices",
|
||||
"defaultAction": "Allow",
|
||||
"ipRules": [],
|
||||
"virtualNetworkRules": []
|
||||
},
|
||||
"primaryEndpoints": {
|
||||
"blob": "https://nfstore.blob.core.windows.net/",
|
||||
"dfs": "https://nfstore.dfs.core.windows.net/",
|
||||
"file": "https://nfstore.file.core.windows.net/",
|
||||
"internetEndpoints": null,
|
||||
"microsoftEndpoints": null,
|
||||
"queue": "https://nfstore.queue.core.windows.net/",
|
||||
"table": "https://nfstore.table.core.windows.net/",
|
||||
"web": "https://nfstore.z6.web.core.windows.net/"
|
||||
},
|
||||
"primaryLocation": "westeurope",
|
||||
"privateEndpointConnections": [],
|
||||
"provisioningState": "Succeeded",
|
||||
"resourceGroup": "nf-storage-group",
|
||||
"routingPreference": null,
|
||||
"secondaryEndpoints": {
|
||||
"blob": "https://nfstore-secondary.blob.core.windows.net/",
|
||||
"dfs": "https://nfstore-secondary.dfs.core.windows.net/",
|
||||
"file": null,
|
||||
"internetEndpoints": null,
|
||||
"microsoftEndpoints": null,
|
||||
"queue": "https://nfstore-secondary.queue.core.windows.net/",
|
||||
"table": "https://nfstore-secondary.table.core.windows.net/",
|
||||
"web": "https://nfstore-secondary.z6.web.core.windows.net/"
|
||||
},
|
||||
"secondaryLocation": "northeurope",
|
||||
"sku": {
|
||||
"name": "Standard_RAGRS",
|
||||
"tier": "Standard"
|
||||
},
|
||||
"statusOfPrimary": "available",
|
||||
"statusOfSecondary": "available",
|
||||
"tags": {},
|
||||
"type": "Microsoft.Storage/storageAccounts"
|
||||
}
|
||||
|
||||
|
||||
|
||||
az storage blob generate-sas \
|
||||
--account-name nfstore \
|
||||
--container-name my-data \
|
||||
--name MyBlob \
|
||||
--permissions racdw \
|
||||
--expiry 2021-06-15
|
||||
74
nextflow/plugins/nf-azure/build.gradle
Normal file
74
nextflow/plugins/nf-azure/build.gradle
Normal file
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
plugins {
|
||||
id 'io.nextflow.nextflow-plugin' version "${nextflowPluginVersion}"
|
||||
id 'java-test-fixtures'
|
||||
}
|
||||
|
||||
nextflowPlugin {
|
||||
nextflowVersion = '25.11.0-edge'
|
||||
|
||||
provider = "${nextflowPluginProvider}"
|
||||
description = 'Enables Azure cloud execution through Batch service with native Blob storage access and comprehensive authentication options'
|
||||
className = 'nextflow.cloud.azure.AzurePlugin'
|
||||
useDefaultDependencies = false
|
||||
generateSpec = false
|
||||
extensionPoints = [
|
||||
'nextflow.cloud.azure.batch.AzBatchExecutor',
|
||||
'nextflow.cloud.azure.config.AzConfig',
|
||||
'nextflow.cloud.azure.file.AzPathFactory',
|
||||
'nextflow.cloud.azure.file.AzPathSerializer',
|
||||
'nextflow.cloud.azure.fusion.AzFusionEnv',
|
||||
]
|
||||
}
|
||||
|
||||
sourceSets {
|
||||
main.java.srcDirs = []
|
||||
main.groovy.srcDirs = ['src/main']
|
||||
main.resources.srcDirs = ['src/resources']
|
||||
test.groovy.srcDirs = ['src/test']
|
||||
test.java.srcDirs = ['src/testResources']
|
||||
test.resources.srcDirs = []
|
||||
}
|
||||
|
||||
configurations {
|
||||
// see https://docs.gradle.org/4.1/userguide/dependency_management.html#sub:exclude_transitive_dependencies
|
||||
runtimeClasspath.exclude group: 'org.slf4j', module: 'slf4j-api'
|
||||
}
|
||||
|
||||
dependencies {
|
||||
compileOnly project(':nextflow')
|
||||
compileOnly 'org.slf4j:slf4j-api:2.0.17'
|
||||
compileOnly 'org.pf4j:pf4j:3.14.1'
|
||||
api('com.azure:azure-storage-blob:12.33.2') {
|
||||
exclude group: 'org.slf4j', module: 'slf4j-api'
|
||||
}
|
||||
api('com.azure:azure-compute-batch:1.0.0-beta.3') {
|
||||
exclude group: 'org.slf4j', module: 'slf4j-api'
|
||||
exclude group: 'com.google.guava', module: 'guava'
|
||||
}
|
||||
api('com.azure:azure-identity:1.18.2') {
|
||||
exclude group: 'org.slf4j', module: 'slf4j-api'
|
||||
}
|
||||
|
||||
// Force patched version to address GHSA-72hv-8253-57qq (jackson-core Number Length Constraint Bypass DoS)
|
||||
runtimeOnly 'com.fasterxml.jackson.core:jackson-core:2.18.6'
|
||||
|
||||
testImplementation(testFixtures(project(":nextflow")))
|
||||
testImplementation project(':nextflow')
|
||||
testImplementation "org.apache.groovy:groovy:4.0.31"
|
||||
testImplementation "org.apache.groovy:groovy-nio:4.0.31"
|
||||
}
|
||||
254
nextflow/plugins/nf-azure/changelog.txt
Normal file
254
nextflow/plugins/nf-azure/changelog.txt
Normal file
@@ -0,0 +1,254 @@
|
||||
nf-azure changelog
|
||||
===================
|
||||
1.22.2 - 26 Mar 2026
|
||||
- Fix netty and jackson vulnerabilities (#6955) [8dafdd95d]
|
||||
- Fix security vulnerabilities (#6938) [8b1ab40c4]
|
||||
|
||||
1.22.1 - 17 Mar 2026
|
||||
- Record types (#6679) [d54ff29af]
|
||||
|
||||
1.22.0 - 28 Feb 2026
|
||||
- Handle Azure Batch ActiveJobAndScheduleQuotaReached with retry (#6874) [6e66aaa58]
|
||||
- Update default Azure Batch VM image to Ubuntu 24.04 (#6844) [b621fc7cb]
|
||||
|
||||
1.21.0 - 28 Nov 2025
|
||||
- Optimize exit code handling by relying on scheduler status for successful executions (#6484) [454a2ae85]
|
||||
|
||||
1.20.2 - 21 Oct 2025
|
||||
- Rename `config.schema` package to `config.spec` (#6485) [ef0d2d601]
|
||||
|
||||
1.20.1 - 8 Oct 2025
|
||||
- Fix unstage controls in command.run when using storeDir (#6364) [a5756da3e]
|
||||
|
||||
1.19.0 - 15 Aug 2025
|
||||
- Fix Azure Batch startTask concatenation issue (#6300) (#6305) [ci fast] [81d5c0dc]
|
||||
- Unify nf-lang config scopes with runtime classes (#6271) [bfa67ca3]
|
||||
- Update Azure and AWS deps (#6343) [ci fast] [ff00e2de]
|
||||
- Bump groovy 4.0.28 (#6304) [ci fast] [a468f8ef]
|
||||
- Bump netty-codec-http2:4.1.124.Final [7e690b44]
|
||||
|
||||
1.18.0 - 6 Jun 2025
|
||||
- Allow users to provide implicit managed identity to Azure Batch (#6144) [d1f70f50]
|
||||
- Minor Azure Batch disk slot calculation demoted to debug (#6234) [ci skip] [c65955ce]
|
||||
- Bump Slf4j version 2.0.17 [93199e09]
|
||||
|
||||
1.17.0 - 2 Jun 2025
|
||||
- Add support for Azure Managed identities on Azure worker nodes with Fusion (#6118) [37981a5f]
|
||||
- Bump Groovy to version 4.0.27 (#6125) [258e1790]
|
||||
|
||||
1.16.0 - 8 May 2025
|
||||
- Add azure.batch.jobMaxWallClockTime config option (#5996) [74963fdc]
|
||||
- Remove test constructors or mark as TestOnly (#5216) [d4fadd42]
|
||||
|
||||
1.15.0 - 23 Apr 2025
|
||||
- Update Azure Batch VM sizes and regions (#5985) [297150b8]
|
||||
|
||||
1.14.1 - 19 Mar 2025
|
||||
- Fix handling of exit status with Azure Batch and Fusion (#5806) [7085862d]
|
||||
- Removing Azure vmList from log [67ffc8ab]
|
||||
|
||||
1.14.0 - 17 Mar 2025
|
||||
- Add cpu-shares and memory limits to Azure Batch tasks (#5799) [f9c0cbfd]
|
||||
- Add disk directive support in Azure Batch (#5784) [113d7250]
|
||||
- Validates Azure region before checking available VMs (#5108) [080893a2]
|
||||
- Fix Ignore Azure pool already exists error (#5721) [e267961b]
|
||||
- Bump Ubuntu 22.04 as default SKU for Azure Batch (#5804) [e0ba536d]
|
||||
- Bump groovy 4.0.26 [f740bc56]
|
||||
|
||||
1.13.0 - 12 Feb 2025
|
||||
- Allow Azure Batch tasks to be submitted to different pools (#5766) [76790d2a]
|
||||
- Fix Check for number of low priority nodes in Azure Batch before raising a pool resize error (#5576) [9b528c11]
|
||||
- Update azure deps [b163da95]
|
||||
- Bump groovy 4.0.25 [19c40a4a]
|
||||
- Bump io.netty:netty-handler:4.1.118.Final [db4a9037]
|
||||
- Bump net.minidev:json-smart:2.5.2 [b5c4faf4]
|
||||
- Bump netty-common:4.1.118.Final [8574e243]
|
||||
|
||||
1.12.0 - 20 Jan 2025
|
||||
- Ensure job is killed when exception in task status check (#5561) [9eefd207]
|
||||
- Bump logback 1.5.13 + slf4j 2.0.16 [cc0163ac]
|
||||
- Bump groovy 4.0.24 missing deps [40670f7e]
|
||||
|
||||
1.11.0 - 3 Dec 2024
|
||||
- Detecting errors in data unstaging (#5345) [3c8e602d]
|
||||
- Bump netty-common to version 4.1.115.Final [d1bbd3d0]
|
||||
- Bump groovy 4.0.24 [dd71ad31]
|
||||
- Bump com.azure:azure-identity from 1.11.3 to 1.12.2 (#5449) [cb70f1df]
|
||||
- Target Java 17 as minimal Java version (#5045) [0140f954]
|
||||
|
||||
1.10.1 - 27 Oct 2024
|
||||
- Demote azure batch task status log level to trace (#5416) [ci skip] [d6c684bb]
|
||||
|
||||
1.10.0 - 2 Oct 2024
|
||||
- Fix Azure Fusion env misses credentials when no key or SAS provided (#5328) [e11382c8]
|
||||
- Bump groovy 4.0.23 (#5303) [fe3e3ac7]
|
||||
|
||||
1.9.0 - 4 Sep 2024
|
||||
- Support Azure Managed Identities in Fusion configuration logic (#5278) [a0bf8b40]
|
||||
|
||||
1.8.1 - 5 Aug 2024
|
||||
- Bump pf4j to version 3.12.0 [96117b9a]
|
||||
|
||||
1.8.0 - 8 Jul 2024
|
||||
- Update Azure VMs and regions for 2024-07-01 (#5100) [12b027ee]
|
||||
- Add retry options to Azure Blob client (#5098) [7d5e5d2b]
|
||||
- Bump groovy 4.0.22 [284a6606]
|
||||
|
||||
1.7.0 - 17 Jun 2024
|
||||
- Add support for Azure managed identity (#4897) [21ca16e6]
|
||||
- Fix Azure system-assigned managed identity [a639a17d]
|
||||
- Fix support for Azure managed identity clientId [306814e7]
|
||||
- Bump azure-compute-batch:1.0.0-beta.2 [c08dc49b]
|
||||
- Bump azure-storage-blob 12.26.1 [c76ff5e7]
|
||||
|
||||
1.6.1 - 1 Aug 2024
|
||||
- Update Azure batch deps [72576648]
|
||||
- Bump pf4j to version 3.12.0 [1a8f086a]
|
||||
|
||||
1.6.0 - 15 Apr 2024
|
||||
- Add support for Azure custom startTask (#4913) [27d01e3a]
|
||||
- Improve control on azcopy install (#4883) [01447d5c]
|
||||
- Fix Azure pool creation [2ee4d11e]
|
||||
- Bump groovy 4.0.21 [9e08390b]
|
||||
|
||||
1.5.1 - 10 Mar 2024
|
||||
- Update Azure dependencies [1bcbaf0d]
|
||||
- Bump groovy 4.0.19 [854dc1f0]
|
||||
|
||||
1.5.0 - 5 Feb 2024
|
||||
- Fix azure retry policy (#4638) [85bab699]
|
||||
- Use AZURE_STORAGE_SAS_TOKEN environment variable (#4627) [2e02afbf]
|
||||
- Bump Groovy 4 (#4443) [9d32503b]
|
||||
|
||||
1.4.0 - 24 Nov 2023
|
||||
- Fix security vulnerabilities (#4513) [a310c777]
|
||||
- Add support for Azure low-priority pool (#4527) [8320ea10]
|
||||
|
||||
1.3.3-patch3 - 31 Jul 2024
|
||||
- Update Azure batch deps [e0c6d77d]
|
||||
|
||||
1.3.3-patch2 - 11 Jun 2024
|
||||
- Fix security vulnerabilities (#5057) [6d8765b8]
|
||||
|
||||
1.3.3-patch1 - 28 May 2024
|
||||
- Bump dependency with Nextflow 23.10.2
|
||||
|
||||
1.3.3 - 12 Jan 2023
|
||||
- Use AZURE_STORAGE_SAS_TOKEN environment variable (#4627) [2e1cb413]
|
||||
- Fix azure retry policy (#4638) [2bc3cf0e]
|
||||
|
||||
1.3.2 - 28 Sep 2023
|
||||
- Retry TimeoutException in azure file system (#4295) [79248355]
|
||||
|
||||
1.3.1 - 10 Sep 2023
|
||||
- Disable staging script for remote work dir (#4282) [80f7cd46]
|
||||
- Fix IOException should be thrown when failing to create Azure directory [b0bdfd79]
|
||||
- Fix security deps in nf-azure plugin [c30d5211]
|
||||
- Bump groovy 3.0.19 [cb411208]
|
||||
|
||||
1.3.0 - 17 Aug 2023
|
||||
- Add resource labels support for Azure Batch (#4178) [7b5e50a1]
|
||||
- Fix typos in source code comments (#4173) [e78bc37e]
|
||||
|
||||
1.2.0 - 5 Aug 2023
|
||||
- Add deleteTasksOnCompletion to Azure Batch configuration (#4114) [b14674dc]
|
||||
|
||||
1.1.4 - 22 Jul 2023
|
||||
- Fix failing test [9a52f848]
|
||||
- Fix Improve error message for invalid Azure URI [0f4d8867]
|
||||
- Fix invalid detection of hierarchical namespace stub blobs as files (#4046) [ce06c877]
|
||||
- Wait for all child processes in nxf_parallel (#4050) [60a5f1a7]
|
||||
- Bump Groovy 3.0.18 [207eb535]
|
||||
|
||||
1.1.3 - 19 Jum 2023
|
||||
- Increase Azure min retry delay to 250ms [2e77e5e4]
|
||||
- Fix AzFileSystem retry policy (2) [c2f3cc96]
|
||||
|
||||
1.1.2 - 19 Jun 2023
|
||||
- Fix AzFileSystem retry policy [ba9b6d18]
|
||||
- Improve Azure retry logging [de58697a]
|
||||
|
||||
1.1.1 - 14 Jun 2023
|
||||
- Add retry policy on Az blob operations [295bc1ff]
|
||||
- Bump azure-storage-blob:12.22.1 [2a36fa77]
|
||||
- Fix S3 path normalization [b75ec444]
|
||||
|
||||
1.1.0 - 15 May 2023
|
||||
- Add support for `time` directive in Azure Batch (#3869) [5c11a0d4]
|
||||
- Increase Azure default maxRetries to 10 [a017139f]
|
||||
- Fix Azure jobs correctly deleted after completion (#3927) [b173a983]
|
||||
- Fix missing SAS token fusion env for Azure [43015029]
|
||||
- Fix failing tests [06337962]
|
||||
- Fix Azure pool creation when using scaling formula (#3868) [79984a87]
|
||||
- Security fixes [973b7bea]
|
||||
- Update logging libraries [d7eae86e]
|
||||
- Bump groovy 3.0.17 [cfe4ba56]
|
||||
|
||||
1.0.1 - 15 Apr 2023
|
||||
- Security fixes [83e8fd6a]
|
||||
- Fix Azure pool creation when using scaling formula (#3868) [84a808a5]
|
||||
|
||||
1.0.0 - 1 Apr 2023
|
||||
- Add support for Fusion to Azure Batch executor (#3209) [3d3cbfa2]
|
||||
- Fix NoSuchMethodError String.stripIndent with Java 11 [308eafe6]
|
||||
|
||||
0.16.0 - 19 Mar 2023
|
||||
- Add azure batch pool virtualNetwork option (#3723) [e3917b8e]
|
||||
- Update Azure VM sizes (#3751) [1d06e9a6]
|
||||
- Increase pwd obfuscation min length [ba23d036]
|
||||
- Bump groovy 3.0.16 [d3ff5dcb]
|
||||
|
||||
0.15.1 - 14 Jan 2023
|
||||
- Improve container native executor configuration [03126371]
|
||||
- Minor logging change [646776a8]
|
||||
- Bump groovy 3.0.14 [7c204236]
|
||||
|
||||
0.15.0 - 23 Nov 2022
|
||||
- Allow identity based authentication on Azure Batch (#3132) [a08611be]
|
||||
- Add Azure SAS token validation [e2244b48]
|
||||
|
||||
0.14.1 - 10 Sep 2022
|
||||
- Fix Azure NPE on missing pool opts [d5c0aabd]
|
||||
- Fix shutdown/cleanup hooks invocation [f4185070
|
||||
|
||||
0.14.0 - 7 Sep 2022
|
||||
- Fix thread pool race condition on shutdown [8d2b0587]
|
||||
- Update Azure vm types [80f5fbe4]
|
||||
|
||||
0.13.5 - 1 Sep 2022
|
||||
- Get rid of remote bin dir [6cfb51e7]
|
||||
- Fix typos in log messages [76a87c72]
|
||||
- Improve Az Batch err handling and testing [85d31e8d]
|
||||
|
||||
0.13.4 - 1 Aug 2022
|
||||
- Add retry when Azure submit fails with OperationTimedOut [6a3f9742]
|
||||
|
||||
0.13.3 - 13 Jul 2022
|
||||
- Fix escape unstage outputs with double quotes #2912 #2904 #2790 [49ff02a6]
|
||||
|
||||
0.13.2 - 15 May 2022
|
||||
- Update default SKU for Azure Batch 'batch.node.ubuntu 20.04' [be60fc14]
|
||||
|
||||
0.13.1 - 2 Apr 2022
|
||||
- Add retry policy Azure create job [792820a2]
|
||||
|
||||
0.13.0 - 27 Mar 2022
|
||||
- Add azcopy fine grain config settings [3998a56b]
|
||||
- Add retry policy to Az Batch operations [991c6175]
|
||||
|
||||
0.12.0 - 6 Feb 2022
|
||||
- Generate "account" token instead of container token when not SAS token is not provided [d5125975d]
|
||||
- Fix upload of nested directory outputs on azure [85ad55225] [744447155]
|
||||
|
||||
0.11.2 - 22 Nov 2021
|
||||
- Fix Azure executor fail to cleanup jobs on completion [533448be4]
|
||||
- Make Azure executor logging less verbose [e0b2117ad]
|
||||
|
||||
0.11.1 - 18 Nov 2021
|
||||
- Fix NPE when pool is not configured and auto pool mode is not specified
|
||||
|
||||
0.11.0 - 12 Oct 2021
|
||||
- Add Azure pool node SKU selection #2360 [9afcac756]
|
||||
- Add Built-in support for Azure File Shares #2285 [a4c3e0ad5]
|
||||
- Add missing information for pulling images from private registry in Azure Batch #2355 [040e190bd]
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright 2013-2026, Seqera Labs
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package nextflow.cloud.azure
|
||||
|
||||
import groovy.transform.CompileStatic
|
||||
import nextflow.cloud.azure.nio.AzFileSystemProvider
|
||||
import nextflow.file.FileHelper
|
||||
import nextflow.plugin.BasePlugin
|
||||
import org.pf4j.PluginWrapper
|
||||
|
||||
/**
|
||||
* Azure cloud plugin for Nextflow
|
||||
*
|
||||
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
@CompileStatic
|
||||
class AzurePlugin extends BasePlugin {
|
||||
|
||||
AzurePlugin(PluginWrapper wrapper) {
|
||||
super(wrapper)
|
||||
}
|
||||
|
||||
@Override
|
||||
void start() {
|
||||
super.start()
|
||||
// register Azure file system
|
||||
FileHelper.getOrInstallProvider(AzFileSystemProvider)
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user