add nextflow d30e48d

This commit is contained in:
2026-04-29 23:01:54 +02:00
parent d0b12d668d
commit 97cc9058d3
2840 changed files with 730250 additions and 0 deletions

View File

@@ -0,0 +1,89 @@
apply plugin: 'java'
ext.aws_access_key_id = project.findProperty('aws_access_key_id') ?: System.getenv('AWS_ACCESS_KEY_ID')
ext.aws_secret_access_key = project.findProperty('aws_secret_access_key') ?: System.getenv('AWS_SECRET_ACCESS_KEY')
ext.publishRepoUrl = project.findProperty('publish_repo_url') ?: System.getenv('PUBLISH_REPO_URL') ?: ( version.endsWith('-SNAPSHOT') ? "s3://maven.seqera.io/snapshots" : "s3://maven.seqera.io/releases" )
jar.enabled = false
subprojects {
apply plugin: 'java'
apply plugin: 'groovy'
apply plugin: 'maven-publish'
repositories {
mavenLocal()
mavenCentral()
}
group = 'io.nextflow'
version = project.file('VERSION').text.trim()
tasks.withType(GenerateModuleMetadata) {
enabled = false
}
/*
* Copy the plugin dependencies in the subproject `build/target/libs` directory
*/
task copyPluginLibs(type: Sync) {
group 'nextflow'
from configurations.runtimeClasspath
into 'build/target/libs'
}
/*
* Copy the plugin manifest to resources directory for dev mode discovery.
* In dev mode, pf4j looks for META-INF/MANIFEST.MF in the classpath directories,
* but Gradle only generates it during JAR creation. This task copies the manifest
* to the resources directory after the JAR is built.
*/
task copyPluginManifest(type: Copy) {
group 'nextflow'
from 'build/tmp/jar/MANIFEST.MF'
into 'build/resources/main/META-INF'
dependsOn jar
}
// Ensure manifest is available for test classpath (needed for dev mode plugin discovery)
tasks.matching { it.name == 'test' }.configureEach {
dependsOn copyPluginManifest
}
// Ensure packagePlugin task depends on copyPluginManifest to avoid implicit dependency issues
tasks.matching { it.name == 'packagePlugin' }.configureEach {
dependsOn copyPluginManifest
}
/*
* publish jars maven repo on S3
*/
publishing {
publications {
maven(MavenPublication) {
from components.java
suppressPomMetadataWarningsFor('testFixturesApiElements')
suppressPomMetadataWarningsFor('testFixturesRuntimeElements')
}
}
repositories {
maven {
name = 'Seqera'
url = publishRepoUrl
credentials(AwsCredentials) {
// keys are defined in the `gradle.properties` file
accessKey aws_access_key_id
secretKey aws_secret_access_key
}
}
}
}
}
/*
* "install" the plugin the project root build/plugins directory
*/
project.parent.tasks.getByName("assemble").dependsOn << assemble
/*
* Copies the plugins required dependencies in the corresponding lib directory
*/
classes.dependsOn subprojects.copyPluginLibs

View File

@@ -0,0 +1,3 @@
## plugins settings
nextflowPluginVersion=1.0.0-beta.14
nextflowPluginProvider=nextflow-io

View File

@@ -0,0 +1,89 @@
# Amazon Web Services plugin for Nextflow
## Summary
The Amazon Web Services (AWS) plugin provides support for AWS, including AWS Batch as a compute executor, S3 as a file system, and Fusion file system for high-performance data operations.
## Get started
To use this plugin, add it to your `nextflow.config`:
```groovy
plugins {
id 'nf-amazon'
}
```
Configure your AWS credentials using environment variables, AWS CLI profiles, or IAM roles. Then set up the executor and work directory:
```groovy
process.executor = 'awsbatch'
process.queue = '<YOUR BATCH QUEUE>'
workDir = 's3://<YOUR BUCKET>/work'
aws {
region = 'us-east-1'
batch {
cliPath = '/home/ec2-user/miniconda/bin/aws'
}
}
```
## Examples
### Basic AWS Batch configuration
```groovy
plugins {
id 'nf-amazon'
}
process.executor = 'awsbatch'
process.queue = 'my-batch-queue'
workDir = 's3://my-bucket/work'
aws {
region = 'eu-west-1'
batch {
cliPath = '/home/ec2-user/miniconda/bin/aws'
jobRole = 'arn:aws:iam::123456789:role/MyBatchJobRole'
}
}
```
### Using Fusion file system
```groovy
fusion {
enabled = true
}
wave {
enabled = true
}
process.executor = 'awsbatch'
workDir = 's3://my-bucket/work'
```
### S3 storage options
```groovy
aws {
client {
maxConnections = 20
connectionTimeout = 10000
storageEncryption = 'AES256'
}
region = 'us-east-1'
}
```
## Resources
- [AWS Batch Executor Documentation](https://nextflow.io/docs/latest/aws.html)
- [Amazon S3 Storage Documentation](https://nextflow.io/docs/latest/aws.html#s3-storage)
## License
[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0)

View File

@@ -0,0 +1 @@
3.9.0

View File

@@ -0,0 +1,83 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
plugins {
id 'io.nextflow.nextflow-plugin' version "${nextflowPluginVersion}"
id 'java-test-fixtures'
}
nextflowPlugin {
nextflowVersion = '26.03.4-edge'
provider = "${nextflowPluginProvider}"
description = 'Provides comprehensive AWS cloud integration including Batch executor, S3 file system, and Fusion support for high-performance data operations'
className = 'nextflow.cloud.aws.AmazonPlugin'
useDefaultDependencies = false
generateSpec = false
extensionPoints = [
'nextflow.cloud.aws.batch.AwsBatchExecutor',
'nextflow.cloud.aws.config.AwsConfig',
'nextflow.cloud.aws.fusion.AwsFusionEnv',
'nextflow.cloud.aws.mail.AwsMailProvider',
'nextflow.cloud.aws.util.S3PathFactory',
'nextflow.cloud.aws.util.S3PathSerializer',
]
}
sourceSets {
main.java.srcDirs = []
main.groovy.srcDirs = ['src/main']
main.resources.srcDirs = ['src/resources']
test.groovy.srcDirs = ['src/test']
test.java.srcDirs = []
test.resources.srcDirs = ['src/testResources']
}
configurations {
// see https://docs.gradle.org/4.1/userguide/dependency_management.html#sub:exclude_transitive_dependencies
runtimeClasspath.exclude group: 'org.slf4j', module: 'slf4j-api'
}
dependencies {
compileOnly project(':nextflow')
compileOnly 'org.slf4j:slf4j-api:2.0.17'
compileOnly 'org.pf4j:pf4j:3.14.1'
api ('javax.xml.bind:jaxb-api:2.4.0-b180830.0359')
api ('software.amazon.awssdk:s3:2.33.2')
api ('software.amazon.awssdk:ec2:2.33.2')
api ('software.amazon.awssdk:batch:2.33.2')
api ('software.amazon.awssdk:iam:2.33.2')
api ('software.amazon.awssdk:ecs:2.33.2')
api ('software.amazon.awssdk:cloudwatchlogs:2.33.2')
api ('software.amazon.awssdk:codecommit:2.33.2')
api ('software.amazon.awssdk:sts:2.33.2')
api ('software.amazon.awssdk:ses:2.33.2')
api ('software.amazon.awssdk:sso:2.33.2')
api ('software.amazon.awssdk:ssooidc:2.33.2')
api ('software.amazon.awssdk:s3-transfer-manager:2.33.2')
api ('software.amazon.awssdk:apache-client:2.33.2')
api ('software.amazon.awssdk:aws-crt-client:2.33.2')
// address security vulnerabilities
implementation 'io.netty:netty-common:4.1.132.Final'
implementation 'io.netty:netty-handler:4.1.132.Final'
implementation 'io.netty:netty-codec-http2:4.1.132.Final'
testImplementation(testFixtures(project(":nextflow")))
testImplementation project(':nextflow')
testImplementation "org.apache.groovy:groovy:4.0.31"
testImplementation "org.apache.groovy:groovy-nio:4.0.31"
}

View File

@@ -0,0 +1,376 @@
nf-amazon changelog
===================
3.9.0 - 25 Apr 2026
- Add hints process directive for executor-specific scheduling hints (#7034) [406358e03]
3.8.3 - 20 Apr 2026
- Fix inconsistent indentation in nf-amazon (#7047) [df6855d7d]
- Fix S3FileSystemProvider.newInputStream() draining full object on close (#7046) [cf3867604]
- Apply socket timeout to S3 CRT connections (#7024) [6f4a21764]
- Manage AWS SDK exceptions to convert to the appropriate IO exceptions (#6707) [39c755663]
3.8.2 - 7 Apr 2026
- Bump org.apache.groovy from 4.0.30 to 4.0.31 (#6985) [62a391588]
- Bump org.pf4j:pf4j from 3.12.0 to 3.14.1 (#6983) [95aba07a3]
- Bump io.netty:netty-codec-http2 from 4.1.129.Final to 4.1.132.Final (#6981) [d12cdc61e]
3.8.1 - 26 Mar 2026
- Fix AWS Batch machine type trace for new instance families (#6952) [06e78ba0b]
- Fix download of empty files in old minio based S3 transfers (#6944) [ccded1845]
- Fix jackson-databind dependency in nf-amazon (#6941) [321c57f88]
- Fix security vulnerabilities (#6938) [8b1ab40c4]
3.8.0 - 17 Mar 2026
- Add multi-arch support to arch process directive (#6897) [c7ca36902]
3.7.1 - 28 Feb 2026
- Fix S3 lookup unbounded pagination with double call (#6851) [a2e67eb99]
- Fix S3 delete throwing DirectoryNotEmptyException due to eventual consistency (#6833) [f3ac49754]
3.7.0 - 8 Feb 2026
- Fix isCompleted check in getNumSpotInterruptions (#6805) [76558481a]
- Fix AWS Batch spot instance detection (#6722) [29356f60e]
- Fix error when checking whether an S3 bucket exists (#6706) [c1bd552ab]
- Remove isCompleted() from getNumSpotInterruptions (#6729) [24cc59e27]
- Add aws.batch.forceGlacierTransfer config option (#6700) [e3bf3153b]
3.6.0 - 19 Dec 2025
- Add spot interruption tracking to trace records (#6606) [eecd81671]
3.5.0 - 28 Nov 2025
- Optimize exit code handling by relying on scheduler status for successful executions (#6484) [454a2ae85]
3.4.2 - 28 Nov 2025
- Fix incorrect AWS region when specifying a S3 regional endpoint (#6530) [770bdd3eb]
- Fix unordered completed parts in AWS multipart upload (#6560) [89eb70130]
3.4.1 - 22 Oct 2025
- Fix no secrets in AWS Batch jobs (#6499) [c76c32582]
- Simplify S3 configuration options (#6496) [0b7f18049]
3.4.0 - 21 Oct 2025
- Limit S3 client connections when using virtual threads (#6369) [295c60b5c]
- Rename `config.schema` package to `config.spec` (#6485) [ef0d2d601]
3.3.0 - 8 Oct 2025
- Fix SIGTERM forwarding in AWS Batch jobs (#6414) [abbef79b7]
- Limit S3 concurrent downloads to fix Java Heap OOM (#6402) [fd71d0e8c]
3.1.0 - 15 Aug 2025
- Fix AWS transfer manager anonymous fallback (#6296) [ci fast] [ed5c99e1]
- Fix silent failure when downloading a directory with AWS SDK v2 (#6266) [ci fast] [2d76d8f0]
- Unify nf-lang config scopes with runtime classes (#6271) [bfa67ca3]
- Update Azure and AWS deps (#6343) [ci fast] [ff00e2de]
- Bump groovy 4.0.28 (#6304) [ci fast] [a468f8ef]
- Bump netty-codec-http2:4.1.124.Final [7e690b44]
3.0.0 - 6 Jul 2025
- Fix AWS nio tests [ci fast] [069653dd]
- Fix replace List.getFirst with List.get(0) for compatibility [83428ee2]
- Upgrade to AWS Java SDK v2 (#6165) [fc99b447]
- Bump Slf4j version 2.0.17 [93199e09]
2.15.0 - 8 May 2025
- Add verbose AWS Batch job cleanup logging [504bd2df]
- Remove test constructors or mark as TestOnly (#5216) [d4fadd42]
2.14.0 - 23 Apr 2025
- Add support for data and execution lineage (#5715) [20e06da7]
- Manage AWS Batch Unscheduled jobs (#5936) [44abe60c]
- Workflow outputs (third preview) (#5909) [2e2dea42]
- Add support for Fusion Snapshots (#5954) [d7f047f8]
2.13.0 - 17 Mar 2025
- Fix Consider AWS China as custom endpoint (#5840) [232ce9d1]
- Fix Prevent S3 global option when using custom endpoints (#5779) [ed9da469]
- Bump groovy 4.0.26 [f740bc56]
2.12.0 - 12 Feb 2025
- Fix bugs with workflow outputs (#5502) [ab59d30f]
- Fail the run if publish thread pool times out (#5578) [5325e5a6]
- Bump groovy 4.0.25 [19c40a4a]
- Bump io.netty:netty-handler:4.1.118.Final [db4a9037]
2.11.0 - 20 Jan 2025
- Disable AWS SDK v1 warning [ci fast] [cd00a26d]
- Ensure job is killed when exception in task status check (#5561) [9eefd207]
- Bump logback 1.5.13 + slf4j 2.0.16 [cc0163ac]
- Bump groovy 4.0.24 missing deps [40670f7e]
2.10.0 - 3 Dec 2024
- Detecting errors in data unstaging (#5345) [3c8e602d]
- Prevent NPE with null AWS Batch response [12fc1d60]
- Fix Fargate warning on memory check (#5475) [bdf0ad00]
- Bump groovy 4.0.24 [dd71ad31]
- Bump aws sdk 1.12.777 (#5458) [8bad0b4b]
- Bump netty-common to version 4.1.115.Final [d1bbd3d0]
2.9.0 - 2 Oct 2024
- Add Platform workflow prefix in AWS Batch job names (#5318) [e2e test] [42dd4ba8]
- Fix AWS spot attempts with zero value (#5331) [ci fast] [bac2da12]
- Bump groovy 4.0.23 (#5303) [ci fast] [fe3e3ac7]
2.8.0 - 4 Sep 2024
- Disable AWS spot retry (#5215) [f28fcb25]
2.7.0 - 5 Aug 2024
- More robust parsing of shm-size containerOptions (#5177) [b56802a3]
- Fix AWS Cloudwatch access when using custom log group name [30195838]
- Fix Prevent AWS Batch retry the job execution when the container does not exist [4e218f22]
- Fix Invalid AWS Fargate CPUs usage error reporting [d9c50e59]
- Bump amazon sdk to version 1.12.766 [cc6ec314]
- Bump pf4j to version 3.12.0 [96117b9a]
2.6.0 - 17 Jun 2024
- Allow requester pays for S3 buckets (#5027) [0070c1b0]
- Fix support for s5cmd 2.2.2 (#5069) [7e78bd4d]
- Bump aws-sdk 1.12.740 [acad2a1f]
2.5.3 - 1 Aug 2024
- More robust parsing of shm-size containerOptions (#5177) [98cf0068]
- Bump amazon sdk to version 1.12.766 [5ce42b79]
- Bump pf4j to version 3.12.0 [1a8f086a]
2.5.2 - 20 May 2024
- Fix nf-amazon plugin dependency [c234b09f]
2.5.1 - 14 May 2024
- Use protected visibility for updateStatus method [6871ba06]
2.5.0 - 13 May 2024
- Add support for Job arrays (#3892) [ca9bc9d4]
- Fix Use fully qualified S3 uris in error message (#4923) [f1cffd1b]
2.4.2 - 15 Apr 2024
- Improve retry logic for AWS Batch executor [62926c28]
- Bump groovy 4.0.21 [9e08390b]
2.4.1 - 10 Mar 2024
- Fix Error while publishing S3 file with blanks [b74c0227]
- Update copyright info [e3089f0e]
- Bump groovy 4.0.19 [854dc1f0]
2.4.0 - 5 Feb 2024
- Remove Glacier auto-retrieval (#4705) [5f0ec50d]
- Bump nextflow 23.12.0-edge as min version [63e83702]
- Bump Groovy 4 (#4443) [9d32503b]
2.3.0 - 20 Dec 2023
- Add AWS_SESSION_TOKEN to Fusion environment (#4581) [552f29b0]
- Add experimental support for Fargate compute type for AWS Batch (#3474) [47cf335b]
2.2.0 - 24 Nov 2023
- Add support for FUSION_AWS_REGION (#4481) [8f8b09fa]
- Fix security vulnerabilities (#4513) [a310c777]
- Fix typos (#4519) [ci fast] [6b1ea726]
- Fix Fusion symlinks when publishing files (#4348) [89f09fe0]
- Bump javax.xml.bind:jaxb-api:2.4.0-b180830.0359
2.1.4-patch3 - 30 Jul 2024
- Bump amazon sdk to version 1.12.766 [189f58ed]
- Bump pf4j to version 3.12.0 [8dfa4076]
2.1.4-patch2 - 11 Jun 2024
- Fix security vulnerabilities (#5057) [6d8765b8]
2.1.4-patch1 - 28 May 2024
- Bump dependency with Nextflow 23.10.2
2.1.4 - 10 Oct 2023
- Improve S3 endpoint validation [2b9ae6aa]
- Add -cloudcache CLI option (#4385) [73fda582]
2.1.3 - 28 Sep 2023
- Fix minor typos in changelogs/source code (#4319) [4ce9f1df]
- Fix List of S3 bucket for custom endpoint [4327fa58]
- Fix support for S3 custom endpoint with anonymous access [03752815]
- Fix Prevent multi attempts to retrieve AWS creds [b30efe36]
2.1.2 - 10 Sep 2023
- Disable staging script for remote work dir (#4282) [80f7cd46]
- Allow setting shell directive when using the trace file. (#4210) [7d6ad624]
- Bump groovy 3.0.19 [cb411208]
2.1.1 - 5 Aug 2023
- Fix glob resolution for remove files [19a72c40]
- Fix Option fixOwnership traverse parent directories [f2a2ea35]
2.1.0 - 22 Jul 2023
- Add support for AWS SSO credentials provider (#4045) [53e33cde]
- Wait for all child processes in nxf_parallel (#4050) [60a5f1a7]
- Ignore accelerator type for AWS Batch (#4043) [263ecca8]
- Bump Groovy 3.0.18 [207eb535]
2.0.1 - 14 Jun 2023
- Add support for AWS SES as mail sending provider [df85d443]
- Fix access to public S3 bucket when no creds are provided (#3992) [cf8ba466]
- Fix S3 path normalization [b75ec444]
2.0.0 - 15 May 2023
- Add fusion.exportStorageCredentials option [ci fast] [acb6aedf]
- Fix AWS SSE env propagation to Fusion [e24608c3]
- Fix string comparison in S3 client (#3875) [ci fast] [9344d294]
- Preview support for virtual threads (#3871) [5c429046]
- Refactor the AWS configuration (#3498) [a74e42d9]
- Rename AmazonS3Client to S3Client [cc59596a]
- Rename com.upplication.s3fs package to nextflow.cloud.aws.nio [a2f3bb24]
- Security fixes [973b7bea]
- Update logging libraries [d7eae86e]
- Bump groovy 3.0.17 [cfe4ba56]
1.16.2 - 15 Apr 2023
- Update plugin deps [83e8fd6a]
1.16.1 - 1 Apr 2023
- Fix NoSuchMethodError String.stripIndent with Java 11 [308eafe6]
1.16.0 - 19 Mar 2023
- Remove unused classes [9fa8d75b]
- Add support for AWS S3 Glacier Retrieval Tier (#3680) [fab6bd5e]
- Bump groovy 3.0.16 [d3ff5dcb]
1.15.0 - 21 Feb 2023
- Improve AWS batch error logging [8f4884c1]
- Remove deprecated buckets field [0a355ac3]
- Add support for fusion tags (#3609) [ci fast] [8385ec22]
- Add nextflow tags to AWS Batch job def [b465ac52]
- Use Fusion as launcher (#3584) [34a27733]
- Bump FUSION_ prefix variables [ci fast] [a7282d64]
- Fix serialization of S3 paths with spaces (#3565) [ce487624]
- Bump groovy 3.0.15 [7a3ebc7d]
1.14.0 - 14 Jan 2023
- Add `schedulingPriority` option to AWS Batch (use with `shareIdentifier`) (#3505) [06960bb2]
- Fix FilePorter concurrency issue (#3511) [11ccfa26]
- Fix support for AWS ACL for Batch #2671 [a9644919]
- Improve container native executor configuration [03126371]
- Improve AWS batch exit code reporting [d1bb2fe2]
- Refactor Fusion config [902e5b34]
- Refactor Fusion package [52f4c5d5]
- Remove unnecessary Fusion env var [dfa47556]
- Bump groovy 3.0.14 [7c204236]
1.13.0 - 13 Dec 2022
- Add support for AWS Glacier restore [b6110766]
- Add support for S3 storageClass to publishDir [066f9203]
- Fix math overflow when copying large AWS S3 files [f32ea0ba]
- Bump AWS sdk version 1.12.351 [4dd82b66]
- Rewrite fetchIamRole and fetchRegion to use AWS SDK (#3425) [ci skip] [e350f319]
- Bump nf-amazon@1.13.0 [ccaab713]
1.12.0 - 21 Nov 2022
- Improve S3 thread pool config [41021cbc]
1.11.0 - 3 Oct 2022
- Add support for custom S3 content type [02afa332] <Paolo Di Tommaso>
- Get rid of file name rolling for report files [a762ed59] <Paolo Di Tommaso>
1.10.7 - 28 Sep 2022
- Fix Issue copying file bigger than 5gb to S3 [18fd9a44]
1.10.6 - 26 Sep 2022
- Add tags propagation to AWS Batch [d64eeffc]
1.10.5 - 20 Sep 2022
- Fix AWS S3 copy object [b3b90d23]
1.10.4 - 13 Sep 2020
- Add STS library to enable use of IRSA in EKS cluster [62df42c3]
1.10.3 - 10 Sep 2022
- S3 min upload size 5MB [9926d15d]
- Use smaller buffer size for s3 stream uploader [8c643074]
1.10.2 - 7 Sep 2022
- Fix thread pool race condition on shutdown [8d2b0587]
- Fix Intermediate multipart upload requires a minimum size (#3193) [0b66aed6]
1.10.1 - 1 Sep
- Add support for label/tags (#2853) [5d0b7c35]
- Add fusion support to local executor [17160bb0]
- Improve fusion env handling [10f35b60]
- Get rid of remote bin dir [6cfb51e7]
1.10.0 - 11 Aug
- Improve S3 copy via xfer manager [02d2beae]
- Add experimental fusion support [1854f1f2]
- Increase S3 upload chunk size to 100 MB [9c94a080]
- Add support for AWS Batch logs group (#3092) [4ef043ac]
- Fix queueSize setting is not honoured by AWS Batch executor (#3093) [d07bb52b]
- Add share identifier to Aws Batch (#3089) [c0253aba]
1.9.0 - 1 Aug 2022
- Fix Unable to disable scratch attribute with AWS Batch [1770f73a]
- Fix NPE while setting S3 ObjectMetadata #3031 [d6163431] <Jorge Aguilera>
- Fix Unable to retrieve AWS batch instance type #1658 [3c4d4d3b] <Paolo Di Tommaso>
- Fix AWS Batch job definition conflict (#3048) [e5084418] <Paolo Di Tommaso>
- Improve S3 file upload/download via Transfer manager [7e8d2a5a] [b7bf9fe5] <Jorge Aguilera>
1.8.1 - 13 Jul 2022
- Fix Exception when settings AWS Batch containerOptions #3019 [89312ad8] <Paolo Di Tommaso>
- Add docs aws.client.s3PathStyleAccess config (#3000) [20005500] <Abhinav Sharma>
1.8.0 - 25 May 2022
- Add support for custom KMS keys
- Add support for virtual file system move operation [8c0ddfd5]
1.7.2 - 15 Apr 2022
- Fix Aws Batch retry policy on spot reclaim [d855f0d9]
1.7.1 - 23 Apr 2022
- Add config option `aws.client.anonymous` to allow the access of public buckets
- Add config option `aws.client.debug`
- Fix SS3 storage encryption flag for Batch submit job
- Change upload default chunk size to 20MB
1.7.0 - 6 Apr 2022
- Add native handling for spot instance interruptions
- Add config option `aws.batch.maxSpotAttempts`
- Add fetching container reason attribute on Batch job failure
1.6.0 - 27 Mar 2022
- Handle AWS Spot interruption automatically job retry
- Refactor AWS Batch job unique token generation to prevent
"vCPUs and Memory Values Not Overridden" error see
https://github.com/nextflow-io/nextflow/issues/2561
1.5.2 - 25 Feb 2022
- Prevent S3 tagging with empty list
1.5.1 - 19 Feb 2022
- Fix Batch Job name exceed max allowed size
1.5.0 - 21 Jan 2022
- Add support for secrets management to AWS Batch executor
- Enable parallel s3 downloads by default
1.4.0 - 20 Dec 2021
- Move s3fs client implementation into nf-amazon module
- Bump AWS sdk 1.12.129
1.3.4 - 29 Nov 2021
- Bump s3fs version 1.1.3
- Fix S3 file exists check on provider not supporting Get object ACL
1.3.3 - 22 Nov 2021
- Fix Missing AWS SSE encryption for begin and exit task files #2452
1.3.2 - 18 Nov 2021
- Bump s3fs version 1.1.2 (fix issue propagating KMS key while copying S3 file across encrypted buckets)
1.3.1 - 11 Nov 2021
- Use ResourceRequirement instead of deprecated APIs for cpus and mem requirement
1.3.0 - 28 Oct 2021
- Improve failed task error reporting fetching logs from CloudWatch
1.2.2 - 19 Oct 2021
- Fix issue evaluating dynamic errorStrategy [8c6a5a6] [ce4d491]
1.2.1 - 12 Oct 2021
- Add retry on AWS Job definition not-found error [452cae5d8]
1.2.0 - 6 Aug 2021
- Use AWS cli standard retry mode by default [f2f1fdea4]
- Fix orphaned AWS Batch jobs after pipeline interruption #2169 [65e2a4d05]
- Strengthen AWS Batch task handler [eb1aff275]

View File

@@ -0,0 +1,41 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws
import nextflow.cloud.aws.nio.S3FileSystemProvider
import groovy.transform.CompileStatic
import nextflow.file.FileHelper
import nextflow.plugin.BasePlugin
import org.pf4j.PluginWrapper
/**
* Nextflow plugin for Amazon extensions
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@CompileStatic
class AmazonPlugin extends BasePlugin {
AmazonPlugin(PluginWrapper wrapper) {
super(wrapper)
}
@Override
void start() {
super.start()
FileHelper.getOrInstallProvider(S3FileSystemProvider)
}
}

View File

@@ -0,0 +1,324 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws
import nextflow.cloud.aws.nio.util.S3AsyncClientConfiguration
import nextflow.cloud.aws.nio.util.S3SyncClientConfiguration
import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider
import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider
import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials
import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration
import software.amazon.awssdk.core.exception.SdkClientException
import software.amazon.awssdk.http.SdkHttpClient
import software.amazon.awssdk.regions.Region
import software.amazon.awssdk.regions.providers.InstanceProfileRegionProvider
import software.amazon.awssdk.services.batch.BatchClient
import software.amazon.awssdk.services.cloudwatchlogs.CloudWatchLogsClient
import software.amazon.awssdk.services.ec2.Ec2Client
import software.amazon.awssdk.services.ecs.EcsClient
import software.amazon.awssdk.services.s3.S3AsyncClient
import software.amazon.awssdk.services.s3.S3Client
import software.amazon.awssdk.services.s3.S3Configuration
import software.amazon.awssdk.services.s3.S3CrtAsyncClientBuilder
import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration
import software.amazon.awssdk.services.sts.StsClient
import software.amazon.awssdk.services.sts.model.GetCallerIdentityRequest
import software.amazon.awssdk.services.sts.model.StsException
import groovy.transform.CompileStatic
import groovy.transform.Memoized
import groovy.util.logging.Slf4j
import nextflow.SysEnv
import nextflow.cloud.aws.config.AwsConfig
import nextflow.exception.AbortOperationException
/**
* Implement a factory class for AWS client objects
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@Slf4j
@CompileStatic
class AwsClientFactory {
private AwsConfig config
/**
* The AWS access key credentials (optional)
*/
private String accessKey
/**
* The AWS secret key credentials (optional)
*/
private String secretKey
/**
* The AWS region eg. {@code eu-west-1}. If it's not specified the current region is retrieved from
* the EC2 instance metadata
*/
private String region
private String profile
/**
* Initialise the Amazon cloud driver with default (empty) parameters
*/
AwsClientFactory() {
this(new AwsConfig(Collections.emptyMap()))
}
AwsClientFactory(AwsConfig config, String region=null) {
this.config = config
if( config.accessKey && config.secretKey ) {
this.accessKey = config.accessKey
this.secretKey = config.secretKey
}
// -- the required profile, if any
this.profile
= config.profile
?: SysEnv.get('AWS_PROFILE')
?: SysEnv.get('AWS_DEFAULT_PROFILE')
// -- get the aws default region
this.region
= region
?: config.region
?: SysEnv.get('AWS_REGION')
?: SysEnv.get('AWS_DEFAULT_REGION')
?: fetchRegion()
if( !this.region )
throw new AbortOperationException('Missing AWS region -- Make sure to define in your system environment the variable `AWS_DEFAULT_REGION`')
}
String accessKey() { accessKey }
String secretKey() { secretKey }
String region() { region }
String profile() { profile }
/**
* Retrieve the current IAM role eventually define for a EC2 instance.
* See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html#instance-metadata-security-credentials
*
* @return
* The IAM role name associated to this instance or {@code null} if no role is defined or
* it's not a EC2 instance
*/
protected String fetchIamRole() {
try {
final stsClient = StsClient.create()
return stsClient.getCallerIdentity(GetCallerIdentityRequest.builder().build() as GetCallerIdentityRequest).arn();
}
catch (StsException e) {
log.trace "Unable to fetch IAM credentials -- Cause: ${e.message}"
return null
}
}
/**
* Retrieve the AWS region from the EC2 instance metadata.
* See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html
*
* @return
* The AWS region of the current EC2 instance eg. {@code eu-west-1} or
* {@code null} if it's not an EC2 instance.
*/
private String fetchRegion() {
try {
return new InstanceProfileRegionProvider().getRegion().id();
}
catch (SdkClientException e) {
log.debug("Cannot fetch AWS region", e);
return null;
}
}
/**
* Helper method to map a region string to a {@link Region} object.
*
* @param region An AWS region string identifier eg. {@code eu-west-1}
* @return A {@link Region} corresponding to the specified region string
*/
private Region getRegionObj(String region) {
final result = Region.of(region)
if( !result )
throw new IllegalArgumentException("Not a valid AWS region name: $region");
return result
}
/**
* Gets or lazily creates an {@link Ec2Client} instance given the current
* configuration parameter
*
* @return
* An {@link Ec2Client} instance
*/
synchronized Ec2Client getEc2Client() {
return Ec2Client.builder()
.region(getRegionObj(region))
.credentialsProvider(getCredentialsProvider0())
.build()
}
/**
* Gets or lazily creates an {@link BatchClient} instance given the current
* configuration parameter
*
* @return
* An {@link BatchClient} instance
*/
@Memoized
BatchClient getBatchClient() {
return BatchClient.builder()
.region(getRegionObj(region))
.credentialsProvider(getCredentialsProvider0())
.build()
}
@Memoized
EcsClient getEcsClient() {
return EcsClient.builder()
.region(getRegionObj(region))
.credentialsProvider(getCredentialsProvider0())
.build()
}
@Memoized
CloudWatchLogsClient getLogsClient() {
return CloudWatchLogsClient.builder().region(getRegionObj(region)).credentialsProvider(getCredentialsProvider0()).build()
}
S3Client getS3Client(S3SyncClientConfiguration s3ClientConfig, boolean global = false) {
final SdkHttpClient.Builder httpClientBuilder = s3ClientConfig.getHttpClientBuilder()
final ClientOverrideConfiguration overrideConfiguration = s3ClientConfig.getClientOverrideConfiguration()
final builder = S3Client.builder()
.crossRegionAccessEnabled(global)
.credentialsProvider(getS3CredentialsProvider())
.serviceConfiguration(S3Configuration.builder()
.pathStyleAccessEnabled(config.s3Config.pathStyleAccess)
.multiRegionEnabled(global)
.build())
if( config.s3Config.endpoint )
builder.endpointOverride(URI.create(config.s3Config.endpoint))
// AWS SDK v2 region must be always set, even when endpoint is overridden
builder.region(getRegionObj(region))
if( httpClientBuilder != null )
builder.httpClientBuilder(httpClientBuilder)
if( overrideConfiguration != null )
builder.overrideConfiguration(overrideConfiguration)
return builder.build()
}
S3AsyncClient getS3AsyncClient(S3AsyncClientConfiguration s3ClientConfig, boolean global = false) {
def builder = S3AsyncClient.crtBuilder()
.crossRegionAccessEnabled(global)
.credentialsProvider(getS3CredentialsProvider())
.forcePathStyle(config.s3Config.pathStyleAccess)
.region(getRegionObj(region))
if( config.s3Config.endpoint )
builder.endpointOverride(URI.create(config.s3Config.endpoint))
final retryConfiguration = s3ClientConfig.getCrtRetryConfiguration()
if( retryConfiguration != null )
builder.retryConfiguration(retryConfiguration)
final httpConfiguration = s3ClientConfig.getCrtHttpConfiguration()
if( httpConfiguration != null )
builder.httpConfiguration(httpConfiguration)
final multipartConfig = s3ClientConfig.getMultipartConfiguration()
if( multipartConfig != null )
setMultipartConfiguration(multipartConfig, builder)
final throughput = s3ClientConfig.getTargetThroughputInGbps()
if( throughput != null )
builder.targetThroughputInGbps(throughput)
final nativeMemory = s3ClientConfig.getMaxNativeMemoryInBytes()
if (nativeMemory != null )
builder.maxNativeMemoryLimitInBytes(nativeMemory)
final maxConcurrency = s3ClientConfig.getMaxConcurrency()
if( maxConcurrency != null )
builder.maxConcurrency(maxConcurrency)
return builder.build()
}
/**
* Returns an AwsCredentialsProvider for S3 clients.
*
* This method wraps the same AWS credentials used for other clients, but ensures proper handling of anonymous S3 access.
* If the 'anonymous' flag is set in Nextflow's AWS S3 configuration, or if no credentials are resolved by other providers,
* an AnonymousCredentialsProvider instance is returned.
*
* Prior to AWS SDK v2, the S3CredentialsProvider automatically managed fallback to anonymous access when no credentials were found.
* However, due to a limitation in the AWS SDK v2 CRT Async S3 client (see https://github.com/aws/aws-sdk-java-v2/issues/5810),
* anonymous credentials only work when explicitly configured via AnonymousCredentialsProvider.
* Custom credential providers or provider chains that resolve to anonymous credentials are not handled correctly by the CRT client.
*
* To work around this, this method explicitly checks whether credentials can be resolved.
* If no credentials are found, it returns an AnonymousCredentialsProvider; otherwise, it returns the resolved provider.
*
* @return an AwsCredentialsProvider instance, falling back to anonymous if needed.
*/
private AwsCredentialsProvider getS3CredentialsProvider() {
if ( config.s3Config.anonymous )
return AnonymousCredentialsProvider.create()
def provider = getCredentialsProvider0()
try {
provider.resolveCredentials()
} catch (Exception e) {
log.debug("No AWS credentials available - falling back to anonymous access")
return AnonymousCredentialsProvider.create()
}
return provider
}
private void setMultipartConfiguration(MultipartConfiguration multipartConfig, S3CrtAsyncClientBuilder builder) {
if( multipartConfig.minimumPartSizeInBytes() != null )
builder.minimumPartSizeInBytes(multipartConfig.minimumPartSizeInBytes())
if( multipartConfig.thresholdInBytes() != null )
builder.thresholdInBytes(multipartConfig.thresholdInBytes())
}
protected AwsCredentialsProvider getCredentialsProvider0() {
if( accessKey && secretKey ) {
return StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey))
}
if( profile ) {
return ProfileCredentialsProvider.builder()
.profileName(profile)
.build()
}
return DefaultCredentialsProvider.create()
}
}

View File

@@ -0,0 +1,437 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import java.nio.file.Path
import java.util.concurrent.TimeUnit
import java.util.concurrent.TimeoutException
import software.amazon.awssdk.services.batch.BatchClient
import software.amazon.awssdk.services.batch.model.BatchException
import software.amazon.awssdk.services.ecs.model.AccessDeniedException
import software.amazon.awssdk.services.cloudwatchlogs.model.ResourceNotFoundException
import groovy.transform.CompileStatic
import groovy.transform.PackageScope
import groovy.util.logging.Slf4j
import nextflow.cloud.aws.AwsClientFactory
import nextflow.cloud.aws.config.AwsConfig
import nextflow.cloud.aws.nio.S3Path
import nextflow.cloud.types.CloudMachineInfo
import nextflow.exception.AbortOperationException
import nextflow.executor.Executor
import nextflow.executor.TaskArrayExecutor
import nextflow.extension.FilesEx
import nextflow.fusion.FusionHelper
import nextflow.processor.ParallelPollingMonitor
import nextflow.processor.TaskHandler
import nextflow.processor.TaskMonitor
import nextflow.processor.TaskRun
import nextflow.util.Duration
import nextflow.util.Escape
import nextflow.util.RateUnit
import nextflow.util.ServiceName
import nextflow.util.ThreadPoolHelper
import nextflow.util.ThrottlingExecutor
import org.pf4j.ExtensionPoint
/**
* AWS Batch executor
* https://aws.amazon.com/batch/
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@Slf4j
@ServiceName('awsbatch')
@CompileStatic
class AwsBatchExecutor extends Executor implements ExtensionPoint, TaskArrayExecutor {
/**
* Proxy to throttle AWS batch client requests
*/
@PackageScope
private AwsBatchProxy client
/** Helper class to resolve Batch related metadata */
private AwsBatchHelper helper
/**
* executor service to throttle service requests
*/
private ThrottlingExecutor submitter
/**
* Executor service to throttle cancel requests
*/
private ThrottlingExecutor reaper
/**
* A S3 path where executable scripts need to be uploaded
*/
private Path remoteBinDir = null
private AwsOptions awsOptions
private final Set<String> deletedJobs = new HashSet<>()
AwsOptions getAwsOptions() { awsOptions }
/**
* @return {@code true} to signal containers are managed directly the AWS Batch service
*/
@Override
final boolean isContainerNative() {
return true
}
@Override
String containerConfigEngine() {
return 'docker'
}
/**
* @return {@code true} whenever the secrets handling is managed by the executing platform itself
*/
@Override
final boolean isSecretNative() {
return true
}
@Override
Path getWorkDir() {
session.bucketDir ?: session.workDir
}
protected void validateWorkDir() {
/*
* make sure the work dir is a S3 bucket
*/
if( !(workDir instanceof S3Path) ) {
session.abort()
throw new AbortOperationException("When using `$name` executor an S3 bucket must be provided as working directory using either the `-bucket-dir` or `-work-dir` command line option")
}
}
protected void validatePathDir() {
def path = session.config.navigate('env.PATH')
if( path ) {
log.warn "Environment PATH defined in config file is ignored by AWS Batch executor"
}
}
protected void uploadBinDir() {
/*
* upload local binaries
*/
if( session.binDir && !session.binDir.empty() && !session.disableRemoteBinDir ) {
def s3 = getTempDir()
log.info "Uploading local `bin` scripts folder to ${s3.toUriString()}/bin"
remoteBinDir = FilesEx.copyTo(session.binDir, s3)
}
}
protected void createAwsClient() {
/*
* retrieve config and credentials and create AWS client
*/
final driver = new AwsClientFactory(new AwsConfig(session.config.aws as Map))
/*
* create a proxy for the aws batch client that manages the request throttling
*/
client = new AwsBatchProxy(driver.getBatchClient(), submitter)
helper = new AwsBatchHelper(client, driver)
// create the options object
awsOptions = new AwsOptions(this)
log.debug "[AWS BATCH] Executor ${awsOptions.fargateMode ? '(FARGATE mode) ' : ''}options=$awsOptions"
}
/**
* Initialise the AWS batch executor.
*/
@Override
protected void register() {
super.register()
validateWorkDir()
validatePathDir()
uploadBinDir()
createAwsClient()
}
@PackageScope
Path getRemoteBinDir() {
remoteBinDir
}
@PackageScope
BatchClient getClient() {
client
}
/**
* @return The monitor instance that handles AWS batch tasks
*/
@Override
protected TaskMonitor createTaskMonitor() {
// create the throttling executor
// note this is invoke only the very first time a AWS Batch executor is created
// therefore it's safe to assign to a static attribute
submitter = createExecutorService('AWSBatch-executor')
reaper = createExecutorService('AWSBatch-reaper')
final pollInterval = config.getPollInterval(name, Duration.of('10 sec'))
final dumpInterval = config.getMonitorDumpInterval(name)
final capacity = config.getQueueSize(name, 1000)
final def params = [
name: name,
session: session,
config: config,
pollInterval: pollInterval,
dumpInterval: dumpInterval,
capacity: capacity
]
log.debug "Creating parallel monitor for executor '$name' > pollInterval=$pollInterval; dumpInterval=$dumpInterval"
new ParallelPollingMonitor(submitter, params)
}
/**
* Create a task handler for the given task instance
*
* @param task The {@link TaskRun} instance to be executed
* @return A {@link AwsBatchTaskHandler} for the given task
*/
@Override
TaskHandler createTaskHandler(TaskRun task) {
assert task
assert task.workDir
log.trace "[AWS BATCH] Launching process > ${task.name} -- work folder: ${task.workDirStr}"
new AwsBatchTaskHandler(task, this)
}
private static final List<Integer> RETRYABLE_STATUS = [429, 500, 502, 503, 504]
/**
* @return Creates a {@link ThrottlingExecutor} service to throttle
* the API requests to the AWS Batch service.
*/
private ThrottlingExecutor createExecutorService(String name) {
// queue size can be overridden by submitter options below
final qs = 5_000
final limit = config.getExecConfigProp(name, 'submitRateLimit', '50/s') as String
final size = Runtime.runtime.availableProcessors() * 5
final opts = new ThrottlingExecutor.Options()
.retryOn { Throwable t -> t instanceof BatchException && (t.awsErrorDetails().errorCode() == 'TooManyRequestsException' || t.statusCode() in RETRYABLE_STATUS) }
.onFailure { Throwable t -> session?.abort(t) }
.onRateLimitChange { RateUnit rate -> logRateLimitChange(rate) }
.withRateLimit(limit)
.withQueueSize(qs)
.withPoolSize(size)
.withKeepAlive(Duration.of('1 min'))
.withAutoThrottle(true)
.withMaxRetries(10)
.withPoolName(name)
ThrottlingExecutor.create(opts)
}
@Override
boolean isFusionEnabled() {
return FusionHelper.isFusionEnabled(session)
}
protected void logRateLimitChange(RateUnit rate) {
log.debug "New submission rate limit: $rate"
}
@PackageScope
ThrottlingExecutor getReaper() { reaper }
boolean shouldDeleteJob(String jobId) {
if( jobId in deletedJobs ) {
// if the job is already in the list if has been already deleted
log.debug "[AWS BATCH] cleanup = already deleted job $jobId"
return false
}
synchronized (deletedJobs) {
// add the job id to the set of deleted jobs, if it's a new id, the `add` method
// returns true therefore the job should be deleted
final result = deletedJobs.add(jobId)
log.debug "[AWS BATCH] cleanup = should delete job $jobId: $result"
return result
}
}
CloudMachineInfo getMachineInfoByQueueAndTaskArn(String queue, String taskArn) {
try {
return helper?.getCloudInfoByQueueAndTaskArn(queue, taskArn)
}
catch ( AccessDeniedException e ) {
log.warn "Unable to retrieve AWS Batch instance type | ${e.message}"
// disable it since user has not permission to access this info
awsOptions.fetchInstanceType = false
return null
}
catch( Exception e ) {
log.warn "Unable to retrieve AWS batch instance type for queue=$queue; task=$taskArn | ${e.message}", e
return null
}
}
String getJobOutputStream(String jobId) {
try {
return helper.getTaskLogStream(jobId, awsOptions.getLogsGroup())
}
catch (ResourceNotFoundException e) {
log.debug "Unable to find AWS Cloudwatch logs for Batch Job id=$jobId - ${e.message}"
}
catch (Exception e) {
log.debug "Unable to retrieve AWS Cloudwatch logs for Batch Job id=$jobId | ${e.message}", e
}
return null
}
@Override
void shutdown() {
def tasks = submitter.shutdownNow()
if( tasks ) log.warn "Execution interrupted -- cleaning up execution pool"
submitter.awaitTermination(5, TimeUnit.MINUTES)
// -- finally delete cleanup executor
// start shutdown process
reaper.shutdown()
final waitMsg = "[AWS BATCH] Waiting jobs reaper to complete (%d jobs to be terminated)"
final exitMsg = "[AWS BATCH] Exiting before jobs reaper thread pool complete -- Some jobs may not be terminated"
awaitCompletion(reaper, Duration.of('60min'), waitMsg, exitMsg)
}
protected void awaitCompletion(ThrottlingExecutor executor, Duration duration, String waitMsg, String exitMsg) {
try {
ThreadPoolHelper.await(executor, duration, waitMsg, exitMsg)
}
catch( TimeoutException e ) {
log.warn(e.message, e)
}
}
@Override
String getArrayIndexName() { 'AWS_BATCH_JOB_ARRAY_INDEX' }
@Override
int getArrayIndexStart() { 0 }
@Override
String getArrayTaskId(String jobId, int index) {
return "${jobId}:${index}"
}
@Override
String getArrayLaunchCommand(String taskDir) {
if( isFusionEnabled() || isWorkDirDefaultFS() )
return TaskArrayExecutor.super.getArrayLaunchCommand(taskDir)
else
return Escape.cli(getLaunchCommand(taskDir) as String[])
}
List<String> getLaunchCommand(String s3WorkDir) {
// the cmd list to launch it
final opts = getAwsOptions()
final cmd = opts.s5cmdPath
? s5Cmd(s3WorkDir, opts)
: s3Cmd(s3WorkDir, opts)
return ['bash','-o','pipefail','-c', cmd.toString()]
}
static String s3Cmd(String workDir, AwsOptions opts) {
final cli = opts.getAwsCli()
final debug = opts.debug ? ' --debug' : ''
final sse = opts.storageEncryption ? " --sse $opts.storageEncryption" : ''
final kms = opts.storageKmsKeyId ? " --sse-kms-key-id $opts.storageKmsKeyId" : ''
final requesterPays = opts.requesterPays ? ' --request-payer requester' : ''
final aws = "$cli s3 cp --only-show-errors${sse}${kms}${debug}${requesterPays}"
/*
* Enhanced signal handling for AWS Batch tasks to fix nested Nextflow execution issues.
* This implementation addresses the problem of proper signal forwarding when Nextflow
* processes are executed within AWS Batch containers.
*
* References: https://github.com/nextflow-io/nextflow/pull/6414
*
* Trap command breakdown:
*
* 1. TERM signal trap: `trap \"[[ -n \\\$pid ]] && kill -TERM \\\$pid\" TERM`
* - Captures SIGTERM signals sent to the parent shell process
* - Conditionally forwards the TERM signal to the background bash process (stored in $pid)
* - The `[[ -n \\\$pid ]]` test ensures we only attempt to kill if $pid is set and non-empty
* - This prevents attempts to kill process ID 0 or empty values, which could cause unintended behavior
* - Essential for proper cleanup when AWS Batch terminates jobs or when users cancel workflows
*
* 2. EXIT signal trap: `trap \"{ ret=\$?; $aws ${TaskRun.CMD_LOG} ${workDir}/${TaskRun.CMD_LOG}||true; exit \$ret; }\" EXIT`
* - Executes cleanup actions when the shell process exits (normal or abnormal termination)
* - Captures the exit status ($?) of the last executed command before cleanup
* - Uploads the command log file to S3 for debugging and monitoring purposes
* - Uses `||true` to prevent the trap from failing if S3 upload fails (ensures exit code preservation)
* - Preserves and returns the original exit status to maintain proper error propagation
*
* 3. Background execution pattern: `bash > >(tee ${TaskRun.CMD_LOG}) 2>&1 & pid=\$!; wait \$pid`
* - Runs the actual task command in background (&) to allow signal handling
* - Redirects both stdout and stderr (2>&1) to process substitution for real-time logging
* - Uses `tee` to simultaneously write logs to file and display to console
* - Stores the background process ID in $pid for signal forwarding
* - `wait $pid` ensures the parent shell waits for task completion and returns proper exit code
* - This pattern allows the parent shell to remain responsive to signals while task executes
*/
final cmd = "trap \"[[ -n \\\$pid ]] && kill -TERM \\\$pid\" TERM; trap \"{ ret=\$?; $aws ${TaskRun.CMD_LOG} ${workDir}/${TaskRun.CMD_LOG}||true; exit \$ret; }\" EXIT; $aws ${workDir}/${TaskRun.CMD_RUN} - | bash > >(tee ${TaskRun.CMD_LOG}) 2>&1 & pid=\$!; wait \$pid"
return cmd
}
static String s5Cmd(String workDir, AwsOptions opts) {
final cli = opts.getS5cmdPath()
final sse = opts.storageEncryption ? " --sse $opts.storageEncryption" : ''
final kms = opts.storageKmsKeyId ? " --sse-kms-key-id $opts.storageKmsKeyId" : ''
final requesterPays = opts.requesterPays ? ' --request-payer requester' : ''
/*
* Enhanced signal handling for AWS Batch tasks using s5cmd (high-performance S3 client).
* This implementation mirrors the s3Cmd method but uses s5cmd instead of aws-cli for
* improved S3 transfer performance.
*
* References: https://github.com/nextflow-io/nextflow/pull/6414
*
* The trap commands follow the same pattern as s3Cmd method:
*
* 1. TERM signal trap: `trap \"[[ -n \\\$pid ]] && kill -TERM \\\$pid\" TERM`
* - Ensures proper signal forwarding to background processes when SIGTERM is received
* - Critical for handling AWS Batch job termination and user-initiated cancellations
*
* 2. EXIT signal trap: `trap \"{ ret=\$?; $cli cp${sse}${kms}${requesterPays} ${TaskRun.CMD_LOG} ${workDir}/${TaskRun.CMD_LOG}||true; exit \$ret; }\" EXIT`
* - Performs cleanup by uploading task logs using s5cmd instead of aws-cli
* - Maintains exit status preservation for proper error reporting
*
* 3. Background execution with s5cmd: `$cli cat ${workDir}/${TaskRun.CMD_RUN} | bash > >(tee ${TaskRun.CMD_LOG}) 2>&1 & pid=\$!; wait \$pid`
* - Uses s5cmd to stream the task script directly into bash execution
* - Maintains the same signal-responsive background execution pattern
* - Provides real-time logging while allowing proper signal handling
*/
final cmd = "trap \"[[ -n \\\$pid ]] && kill -TERM \\\$pid\" TERM; trap \"{ ret=\$?; $cli cp${sse}${kms}${requesterPays} ${TaskRun.CMD_LOG} ${workDir}/${TaskRun.CMD_LOG}||true; exit \$ret; }\" EXIT; $cli cat ${workDir}/${TaskRun.CMD_RUN} | bash > >(tee ${TaskRun.CMD_LOG}) 2>&1 & pid=\$!; wait \$pid"
return cmd
}
}

View File

@@ -0,0 +1,173 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import java.nio.file.Path
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.cloud.aws.util.S3BashLib
import nextflow.executor.SimpleFileCopyStrategy
import nextflow.processor.TaskBean
import nextflow.util.Escape
/**
* Defines the script operation to handle file when running in the Cirrus cluster
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@Slf4j
@CompileStatic
class AwsBatchFileCopyStrategy extends SimpleFileCopyStrategy {
private AwsOptions opts
private Map<String,String> environment
AwsBatchFileCopyStrategy(TaskBean task, AwsOptions opts ) {
super(task)
this.opts = opts
this.environment = task.environment
}
/**
* @return A script snippet that download from S3 the task scripts:
* {@code .command.env}, {@code .command.sh}, {@code .command.in},
* etc.
*/
String getBeforeStartScript() {
S3BashLib.script(opts)
}
/**
* {@inheritDoc}
*/
@Override
String getEnvScript(Map environment, boolean container) {
if( container )
throw new IllegalArgumentException("Parameter `container` not supported by ${this.class.simpleName}")
final result = new StringBuilder()
final copy = environment ? new LinkedHashMap<String,String>(environment) : Collections.<String,String>emptyMap()
final path = copy.containsKey('PATH')
// remove any external PATH
if( path )
copy.remove('PATH')
// when a remote bin directory is provide managed it properly
if( opts.remoteBinDir ) {
result << "${opts.getAwsCli()} s3 cp --recursive --only-show-errors s3:/${opts.remoteBinDir} \$PWD/nextflow-bin\n"
result << "chmod +x \$PWD/nextflow-bin/* || true\n"
result << "export PATH=\$PWD/nextflow-bin:\$PATH\n"
}
// finally render the environment
final envSnippet = super.getEnvScript(copy,false)
if( envSnippet )
result << envSnippet
return result.toString()
}
@Override
String getStageInputFilesScript(Map<String,Path> inputFiles) {
def result = 'downloads=(true)\n'
result += super.getStageInputFilesScript(inputFiles) + '\n'
result += 'nxf_parallel "${downloads[@]}"\n'
return result
}
/**
* {@inheritDoc}
*/
@Override
String stageInputFile( Path path, String targetName ) {
// third param should not be escaped, because it's used in the grep match rule
def stage_cmd = opts.maxTransferAttempts > 1 && !opts.retryMode
? "downloads+=(\"nxf_cp_retry nxf_s3_download s3:/${Escape.path(path)} ${Escape.path(targetName)}\")"
: "downloads+=(\"nxf_s3_download s3:/${Escape.path(path)} ${Escape.path(targetName)}\")"
return stage_cmd
}
/**
* {@inheritDoc}
*/
@Override
String getUnstageOutputFilesScript(List<String> outputFiles, Path targetDir) {
final patterns = normalizeGlobStarPaths(outputFiles)
// create a bash script that will copy the out file to the working directory
log.trace "[AWS BATCH] Unstaging file path: $patterns"
if( !patterns )
return null
final escape = new ArrayList(outputFiles.size())
for( String it : patterns )
escape.add( Escape.path(it) )
return """\
uploads=()
IFS=\$'\\n'
for name in \$(eval "ls -1d ${escape.join(' ')}" | sort | uniq); do
uploads+=("nxf_s3_upload '\$name' s3:/${Escape.path(targetDir)}")
done
unset IFS
nxf_parallel "\${uploads[@]}"
""".stripIndent(true)
}
/**
* {@inheritDoc}
*/
@Override
String touchFile( Path file ) {
"echo start | nxf_s3_upload - s3:/${Escape.path(file)}"
}
/**
* {@inheritDoc}
*/
@Override
String fileStr( Path path ) {
Escape.path(path.getFileName())
}
/**
* {@inheritDoc}
*/
@Override
String copyFile( String name, Path target ) {
"nxf_s3_upload ${Escape.path(name)} s3:/${Escape.path(target.getParent())}"
}
static String uploadCmd( String source, Path target ) {
"nxf_s3_upload ${Escape.path(source)} s3:/${Escape.path(target)}"
}
/**
* {@inheritDoc}
*/
String exitFile( Path path ) {
"| nxf_s3_upload - s3:/${Escape.path(path)} || true"
}
/**
* {@inheritDoc}
*/
@Override
String pipeInputFile( Path path ) {
" < ${Escape.path(path.getFileName())}"
}
}

View File

@@ -0,0 +1,239 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import groovy.transform.CompileStatic
import groovy.transform.Memoized
import groovy.util.logging.Slf4j
import nextflow.cloud.aws.AwsClientFactory
import nextflow.cloud.types.CloudMachineInfo
import nextflow.cloud.types.PriceModel
import software.amazon.awssdk.services.batch.BatchClient
import software.amazon.awssdk.services.batch.model.DescribeComputeEnvironmentsRequest
import software.amazon.awssdk.services.batch.model.DescribeJobQueuesRequest
import software.amazon.awssdk.services.batch.model.DescribeJobsRequest
import software.amazon.awssdk.services.cloudwatchlogs.CloudWatchLogsClient
import software.amazon.awssdk.services.cloudwatchlogs.model.GetLogEventsRequest
import software.amazon.awssdk.services.cloudwatchlogs.model.OutputLogEvent
import software.amazon.awssdk.services.ec2.Ec2Client
import software.amazon.awssdk.services.ec2.model.DescribeInstancesRequest
import software.amazon.awssdk.services.ec2.model.Instance
import software.amazon.awssdk.services.ec2.model.InstanceLifecycleType
import software.amazon.awssdk.services.ecs.EcsClient
import software.amazon.awssdk.services.ecs.model.DescribeContainerInstancesRequest
import software.amazon.awssdk.services.ecs.model.DescribeTasksRequest
import software.amazon.awssdk.services.ecs.model.InvalidParameterException
/**
* Helper class to resolve Batch related metadata
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@Slf4j
@CompileStatic
class AwsBatchHelper {
private AwsClientFactory factory
private BatchClient batchClient
AwsBatchHelper(BatchClient batchClient, AwsClientFactory factory) {
this.batchClient = batchClient
this.factory = factory
}
@Memoized
private EcsClient getEcsClient() {
return factory.getEcsClient()
}
@Memoized
private Ec2Client getEc2Client() {
return factory.getEc2Client()
}
@Memoized
private CloudWatchLogsClient getLogsClient() {
return factory.getLogsClient()
}
@Memoized(maxCacheSize = 100)
private List<String> getClusterArnByBatchQueue(String queueName) {
final envNames = getComputeEnvByQueueName(queueName)
return getClusterArnByCompEnvNames(envNames)
}
private List<String> getClusterArnByCompEnvNames(List<String> envNames) {
final req = DescribeComputeEnvironmentsRequest.builder()
.computeEnvironments(envNames)
.build() as DescribeComputeEnvironmentsRequest
batchClient
.describeComputeEnvironments(req)
.computeEnvironments()
*.ecsClusterArn()
}
private List<String> getComputeEnvByQueueName(String queueName) {
final req = DescribeJobQueuesRequest.builder()
.jobQueues(queueName)
.build() as DescribeJobQueuesRequest
final resp = batchClient.describeJobQueues(req)
final result = new ArrayList<String>(10)
for (final queue : resp.jobQueues()) {
for (final order : queue.computeEnvironmentOrder()) {
result.add(order.computeEnvironment())
}
}
return result
}
private CloudMachineInfo getInfoByClusterAndTaskArn(String clusterArn, String taskArn) {
final containerId = getContainerIdByClusterAndTaskArn(clusterArn, taskArn)
final instanceId = containerId ? getInstanceIdByClusterAndContainerId(clusterArn, containerId) : null as String
return instanceId ? getInfoByInstanceId(instanceId) : null
}
private String getContainerIdByClusterAndTaskArn(String clusterArn, String taskArn) {
final describeTaskReq = DescribeTasksRequest.builder()
.cluster(clusterArn)
.tasks(taskArn)
.build() as DescribeTasksRequest
try {
final describeTasksResult = ecsClient.describeTasks(describeTaskReq)
final containers =
describeTasksResult.tasks()
*.containerInstanceArn()
if( containers.size()==1 ) {
return containers.get(0)
}
if( containers.size()==0 ) {
log.debug "Unable to find container id for clusterArn=$clusterArn and taskArn=$taskArn"
return null
}
else
throw new IllegalStateException("Found more than one container for taskArn=$taskArn")
}
catch (InvalidParameterException e) {
log.debug "Cannot find container id for clusterArn=$clusterArn and taskArn=$taskArn - The task is likely running on another cluster"
return null
}
}
private String getInstanceIdByClusterAndContainerId(String clusterArn, String containerId) {
final describeContainerReq = DescribeContainerInstancesRequest.builder()
.cluster(clusterArn)
.containerInstances(containerId)
.build() as DescribeContainerInstancesRequest
final instanceIds = ecsClient
.describeContainerInstances(describeContainerReq)
.containerInstances()
*.ec2InstanceId()
if( !instanceIds ) {
log.debug "Unable to find EC2 instance id for clusterArn=$clusterArn and containerId=$containerId"
return null
}
if( instanceIds.size()==1 )
return instanceIds.get(0)
else
throw new IllegalStateException("Found more than one EC2 instance for containerId=$containerId")
}
@Memoized(maxCacheSize = 1_000)
private CloudMachineInfo getInfoByInstanceId(String instanceId) {
assert instanceId
final req = DescribeInstancesRequest.builder()
.instanceIds(instanceId)
.build() as DescribeInstancesRequest
final res = ec2Client.describeInstances(req).reservations() [0]
final Instance instance = res ? res.instances() [0] : null
if( !instance ) {
log.debug "Unable to find cloud machine info for instanceId=$instanceId"
return null
}
new CloudMachineInfo(
getInstanceType(instance),
instance.placement().availabilityZone(),
getPrice(instance))
}
protected String getInstanceType(Instance instance) {
return instance ? instance.instanceTypeAsString() : null
}
private PriceModel getPrice(Instance instance) {
instance.instanceLifecycle() == InstanceLifecycleType.SPOT ? PriceModel.spot : PriceModel.standard
}
CloudMachineInfo getCloudInfoByQueueAndTaskArn(String queue, String taskArn) {
final clusterArnList = getClusterArnByBatchQueue(queue)
for( String cluster : clusterArnList ) {
final result = getInfoByClusterAndTaskArn(cluster, taskArn)
if( result )
return result
}
log.debug "Unable to find cloud info for queue=$queue and taskArn=$taskArn"
return null
}
protected String getLogStreamId(String jobId) {
final request = DescribeJobsRequest.builder()
.jobs(jobId)
.build() as DescribeJobsRequest
final response = batchClient.describeJobs(request)
if( response.jobs() ) {
final detail = response.jobs()[0]
return detail.container().logStreamName()
}
else {
log.debug "Unable to find info for batch job id=$jobId"
return null
}
}
/**
* Retrieve the cloudwatch logs for the specified AWS Batch Job ID
*
* @param jobId
* The Batch Job ID for which retrieve the job
* @return
* The Batch jobs as a string value or {@code null} if no logs is available. Note, if the log
* is made of multiple *page* this method returns only the first one
*/
String getTaskLogStream(String jobId, String groupName) {
final streamId = getLogStreamId(jobId)
if( !streamId ) {
log.debug "Unable to find CloudWatch log stream for batch job id=$jobId"
return null
}
final logRequest = GetLogEventsRequest.builder()
.logGroupName(groupName ?: "/aws/batch/job")
.logStreamName(streamId)
.build() as GetLogEventsRequest
final result = new StringBuilder()
final resp = logsClient.getLogEvents(logRequest)
for( OutputLogEvent it : resp.events() ) {
result.append(it.message()).append('\n')
}
return result.toString()
}
}

View File

@@ -0,0 +1,40 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import software.amazon.awssdk.services.batch.BatchClient
import nextflow.util.ClientProxyThrottler
import nextflow.util.ThrottlingExecutor
/**
* Implements a AWS Batch client proxy that handle all API invocations
* through the provided executor service
*
* WARN: the caller class/method should not be compile static
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class AwsBatchProxy extends ClientProxyThrottler<BatchClient> {
@Delegate(deprecated=true)
private BatchClient target
AwsBatchProxy(BatchClient client, ThrottlingExecutor executor) {
super(client, executor, [describeJobs: 10 as Byte]) // note: use higher priority for `describeJobs` invocations
this.target = client
}
}

View File

@@ -0,0 +1,53 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import groovy.transform.CompileStatic
import nextflow.container.ContainerHelper
import nextflow.executor.BashWrapperBuilder
import nextflow.processor.TaskBean
import nextflow.processor.TaskRun
/**
* Implements BASH launcher script for AWS Batch jobs
*/
@CompileStatic
class AwsBatchScriptLauncher extends BashWrapperBuilder {
AwsBatchScriptLauncher(TaskBean bean, AwsOptions opts ) {
super(bean, new AwsBatchFileCopyStrategy(bean,opts))
// enable the copying of output file to the S3 work dir
if( scratch==null )
scratch = true
// include task script as an input to force its staging in the container work directory
bean.inputFiles[TaskRun.CMD_SCRIPT] = bean.workDir.resolve(TaskRun.CMD_SCRIPT)
// add the wrapper file when stats are enabled
// NOTE: this must match the logic that uses the run script in BashWrapperBuilder
if( isTraceRequired() ) {
bean.inputFiles[TaskRun.CMD_RUN] = bean.workDir.resolve(TaskRun.CMD_RUN)
}
// include task stdin file
if( bean.input != null ) {
bean.inputFiles[TaskRun.CMD_INFILE] = bean.workDir.resolve(TaskRun.CMD_INFILE)
}
}
@Override
protected boolean fixOwnership() {
return ContainerHelper.fixOwnership(containerConfig)
}
}

View File

@@ -0,0 +1,181 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import nextflow.cloud.aws.batch.model.ContainerPropertiesModel
import software.amazon.awssdk.services.batch.model.KeyValuePair
import software.amazon.awssdk.services.batch.model.LinuxParameters
import software.amazon.awssdk.services.batch.model.Tmpfs
import software.amazon.awssdk.services.batch.model.Ulimit
import groovy.transform.CompileStatic
import nextflow.util.CmdLineOptionMap
import nextflow.util.MemoryUnit
/**
* Maps task container options to AWS container properties
*
* @see https://docs.docker.com/engine/reference/commandline/run/
* @see https://docs.aws.amazon.com/batch/latest/APIReference/API_ContainerProperties.html
*
* @author Manuele Simi <manuele.simi@gmail.com>
*/
@CompileStatic
class AwsContainerOptionsMapper {
@Deprecated
static ContainerPropertiesModel createContainerOpts(CmdLineOptionMap options) {
return createContainerProperties(options)
}
static ContainerPropertiesModel createContainerProperties(CmdLineOptionMap options) {
final containerProperties = new ContainerPropertiesModel()
if ( options?.hasOptions() ) {
checkPrivileged(options, containerProperties)
checkEnvVars(options, containerProperties)
checkUser(options, containerProperties)
checkReadOnly(options, containerProperties)
checkUlimit(options, containerProperties)
LinuxParameters params = checkLinuxParameters(options)
if ( params != null )
containerProperties.linuxParameters(params)
}
return containerProperties
}
protected static void checkPrivileged(CmdLineOptionMap options, ContainerPropertiesModel containerProperties) {
if ( findOptionWithBooleanValue(options, 'privileged') )
containerProperties.privileged(true)
}
protected static void checkEnvVars(CmdLineOptionMap options, ContainerPropertiesModel containerProperties) {
final keyValuePairs = new ArrayList<KeyValuePair>()
List<String> values = findOptionWithMultipleValues(options, 'env')
values.addAll(findOptionWithMultipleValues(options, 'e'))
for( String it : values ) {
final tokens = it.tokenize('=')
keyValuePairs << KeyValuePair.builder().name(tokens[0]).value(tokens.size() == 2 ? tokens[1] : null).build()
}
if ( keyValuePairs )
containerProperties.environment(keyValuePairs)
}
protected static void checkUser(CmdLineOptionMap options, ContainerPropertiesModel containerProperties) {
String user = findOptionWithSingleValue(options, 'u')
if ( !user)
user = findOptionWithSingleValue(options, 'user')
if ( user )
containerProperties.user(user)
}
protected static void checkReadOnly(CmdLineOptionMap options, ContainerPropertiesModel containerProperties) {
if ( findOptionWithBooleanValue(options, 'read-only') )
containerProperties.readonlyRootFilesystem(true);
}
protected static void checkUlimit(CmdLineOptionMap options, ContainerPropertiesModel containerProperties) {
final ulimits = new ArrayList<Ulimit>()
findOptionWithMultipleValues(options, 'ulimit').each { value ->
final tokens = value.tokenize('=')
final limits = tokens[1].tokenize(':')
if ( limits.size() > 1 )
ulimits << Ulimit.builder().name(tokens[0]).softLimit(limits[0] as Integer).hardLimit(limits[1] as Integer).build()
else
ulimits << Ulimit.builder().name(tokens[0]).softLimit(limits[0] as Integer).build()
}
if ( ulimits.size() )
containerProperties.ulimits(ulimits)
}
protected static LinuxParameters checkLinuxParameters(CmdLineOptionMap options) {
final params = LinuxParameters.builder()
boolean atLeastOneSet = false
// shared Memory Size
def value = findOptionWithSingleValue(options, 'shm-size')
if ( value ) {
final sharedMemorySize = MemoryUnit.of(value)
params.sharedMemorySize(sharedMemorySize.mega as Integer)
atLeastOneSet = true
}
// tmpfs mounts, e.g --tmpfs /run:rw,noexec,nosuid,size=64
final tmpfs = new ArrayList<Tmpfs>()
findOptionWithMultipleValues(options, 'tmpfs').each { ovalue ->
def matcher = ovalue =~ /^(?<path>.*):(?<options>.*?),size=(?<sizeMiB>.*)$/
if (matcher.matches()) {
tmpfs << Tmpfs.builder().containerPath(matcher.group('path'))
.size(matcher.group('sizeMiB') as Integer)
.mountOptions(matcher.group('options').tokenize(','))
.build()
} else {
throw new IllegalArgumentException("Found a malformed value '${ovalue}' for --tmpfs option")
}
}
if ( tmpfs ) {
params.tmpfs(tmpfs)
atLeastOneSet = true
}
// swap limit equal to memory plus swap
value = findOptionWithSingleValue(options, 'memory-swap')
if ( value ) {
params.maxSwap(value as Integer)
atLeastOneSet = true
}
// run an init inside the container
if ( findOptionWithBooleanValue(options, 'init') ) {
params.initProcessEnabled(true)
atLeastOneSet = true
}
// tune container memory swappiness
value = findOptionWithSingleValue(options, 'memory-swappiness')
if ( value ) {
params.swappiness(value as Integer)
atLeastOneSet = true
}
return atLeastOneSet ? params.build() : null
}
/**
* Finds the value of an option
* @param name the name of the option
* @return the value, if any, or empty
*/
protected static String findOptionWithSingleValue(CmdLineOptionMap options, String name) {
options.getFirstValueOrDefault(name,null) as String
}
/**
* Finds the values of an option that can be repeated
* @param name the name of the option
* @return the list of values
*/
protected static List<String> findOptionWithMultipleValues(CmdLineOptionMap options, String name) {
options.getValues(name)
}
/**
* Checks if a boolean flag exists
* @param name the name of the flag
* @return true if it exists, false otherwise
*/
protected static boolean findOptionWithBooleanValue(CmdLineOptionMap options, String name) {
options.exists(name) ? options.getFirstValue(name) as Boolean : false
}
}

View File

@@ -0,0 +1,171 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import java.nio.file.Path
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
import groovy.transform.CompileStatic
import groovy.transform.EqualsAndHashCode
import groovy.transform.ToString
import groovy.util.logging.Slf4j
import nextflow.Session
import nextflow.cloud.CloudTransferOptions
import nextflow.cloud.aws.config.AwsConfig
import nextflow.util.Duration
import nextflow.util.TestOnly
/**
* Helper class wrapping AWS config options required for Batch job executions
*/
@Slf4j
@ToString(includeNames = true, includePackage = false)
@EqualsAndHashCode
@CompileStatic
class AwsOptions implements CloudTransferOptions {
public static final List<String> VALID_RETRY_MODES = ['legacy','standard','adaptive']
private AwsConfig awsConfig
String remoteBinDir
volatile Boolean fetchInstanceType
@TestOnly
protected AwsOptions() {
this.awsConfig=new AwsConfig(Collections.emptyMap())
}
AwsOptions( AwsBatchExecutor executor ) {
this(executor.session)
this.remoteBinDir = executor.getRemoteBinDir()
}
@Deprecated
AwsOptions(Session session) {
awsConfig = new AwsConfig(session.config.aws as Map ?: Collections.emptyMap())
fetchInstanceType = session.config.navigate('aws.batch.fetchInstanceType')
if( fetchInstanceType==null )
fetchInstanceType = session.config.navigate('tower.enabled',false)
}
String getRegion() {
return awsConfig.getRegion()
}
String getJobRole() {
return awsConfig.batchConfig.getJobRole()
}
String getLogsGroup() {
return awsConfig.batchConfig.getLogsGroup()
}
String getRetryMode() {
return awsConfig.batchConfig.getRetryMode()
}
String getShareIdentifier() {
return awsConfig.batchConfig.getShareIdentifier()
}
Integer getSchedulingPriority() {
return awsConfig.batchConfig.getSchedulingPriority()
}
String getCliPath() {
return awsConfig.batchConfig.getCliPath()
}
List<String> getVolumes() {
final result = awsConfig.batchConfig.getVolumes()
return result != null ? Collections.unmodifiableList(result) : Collections.<String>emptyList()
}
int getMaxParallelTransfers() {
return awsConfig.batchConfig.getMaxParallelTransfers()
}
int getMaxTransferAttempts() {
return awsConfig.batchConfig.getMaxTransferAttempts()
}
Integer getMaxSpotAttempts() {
return awsConfig.batchConfig.getMaxSpotAttempts()
}
Duration getDelayBetweenAttempts() {
return awsConfig.batchConfig.getDelayBetweenAttempts()
}
String getStorageClass() {
return awsConfig.s3Config.getStorageClass()
}
String getStorageEncryption() {
return awsConfig.s3Config.getStorageEncryption()
}
String getStorageKmsKeyId() {
return awsConfig.s3Config.getStorageKmsKeyId()
}
ObjectCannedACL getS3Acl() {
return awsConfig.s3Config.getS3Acl()
}
Boolean getDebug() {
return awsConfig.s3Config.getDebug()
}
Boolean getRequesterPays() {
return awsConfig.s3Config.getRequesterPays()
}
String getAwsCli() {
def result = getCliPath()
if( !result ) result = 'aws'
if( region ) result += " --region $region"
return result
}
AwsOptions addVolume(Path path) {
awsConfig.batchConfig.addVolume(path)
return this
}
boolean isFargateMode() {
return awsConfig.batchConfig.fargateMode
}
String getS5cmdPath() {
return awsConfig.batchConfig.s5cmdPath
}
String getExecutionRole() {
return awsConfig.batchConfig.getExecutionRole()
}
boolean terminateUnschedulableJobs() {
return awsConfig.batchConfig.terminateUnschedulableJobs
}
Boolean getForceGlacierTransfer() {
return awsConfig.batchConfig.forceGlacierTransfer
}
}

View File

@@ -0,0 +1,287 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch.model
import groovy.transform.CompileStatic
import software.amazon.awssdk.services.batch.model.ContainerProperties
import software.amazon.awssdk.services.batch.model.EphemeralStorage
import software.amazon.awssdk.services.batch.model.KeyValuePair
import software.amazon.awssdk.services.batch.model.LinuxParameters
import software.amazon.awssdk.services.batch.model.LogConfiguration
import software.amazon.awssdk.services.batch.model.MountPoint
import software.amazon.awssdk.services.batch.model.NetworkConfiguration
import software.amazon.awssdk.services.batch.model.ResourceRequirement
import software.amazon.awssdk.services.batch.model.RuntimePlatform
import software.amazon.awssdk.services.batch.model.Secret
import software.amazon.awssdk.services.batch.model.Ulimit
import software.amazon.awssdk.services.batch.model.Volume
/**
* Models the container properties used to configure an AWS Batch job.
*
* This is a mutable version of {@link ContainerProperties} required
* to simplify the extension of container settings in the AWS Batch executor
* and its sub-classes (e.g. nf-xpack).
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@CompileStatic
class ContainerPropertiesModel {
private String image
private List<String> command
private List<ResourceRequirement> resourceRequirements
private String jobRoleArn
private String executionRoleArn
private LinuxParameters linuxParameters
private ArrayList<KeyValuePair> environment
private boolean privileged
private String user
private boolean readonlyRootFilesystem
private ArrayList<Ulimit> ulimits
private LogConfiguration logConfiguration
private List<MountPoint> mountPoints
private List<Volume> volumes
private NetworkConfiguration networkConfiguration
private EphemeralStorage ephemeralStorage
private RuntimePlatform runtimePlatform
private List<Secret> secrets
ContainerPropertiesModel image(String value) {
this.image = value
return this
}
ContainerPropertiesModel command(String... value) {
this.command = value as List<String>
return this
}
ContainerPropertiesModel resourceRequirements(ResourceRequirement... value) {
this.resourceRequirements = value as List<ResourceRequirement>
return this
}
ContainerPropertiesModel jobRoleArn(String value) {
this.jobRoleArn = value
return this
}
ContainerPropertiesModel executionRoleArn(String value) {
this.executionRoleArn = value
return this
}
ContainerPropertiesModel user(String user) {
this.user = user
return this
}
ContainerPropertiesModel readonlyRootFilesystem(boolean value) {
this.readonlyRootFilesystem = value
return this
}
ContainerPropertiesModel environment(ArrayList<KeyValuePair> value) {
this.environment = value
return this
}
ContainerPropertiesModel linuxParameters(LinuxParameters value) {
this.linuxParameters = value
return this
}
ContainerPropertiesModel privileged(boolean value) {
this.privileged = value
return this
}
ContainerPropertiesModel ulimits(ArrayList<Ulimit> value) {
this.ulimits = value
return this
}
ContainerPropertiesModel logConfiguration(LogConfiguration value) {
this.logConfiguration = value
return this
}
ContainerPropertiesModel mountPoints(List<MountPoint> value) {
this.mountPoints = value as List<MountPoint>
return this
}
ContainerPropertiesModel volumes(List<Volume> value) {
this.volumes = value as List<Volume>
return this
}
ContainerPropertiesModel networkConfiguration(NetworkConfiguration value) {
this.networkConfiguration = value
return this
}
ContainerPropertiesModel ephemeralStorage(EphemeralStorage value) {
this.ephemeralStorage = value
return this
}
ContainerPropertiesModel runtimePlatform(RuntimePlatform value) {
this.runtimePlatform = value
return this
}
ContainerPropertiesModel secrets(List<Secret> value) {
this.secrets = value
return this
}
LinuxParameters getLinuxParameters() {
return linuxParameters
}
ArrayList<KeyValuePair> getEnvironment() {
return environment
}
boolean getPrivileged() {
return privileged
}
String getUser() {
return user
}
boolean getReadonlyRootFilesystem() {
return readonlyRootFilesystem
}
ArrayList<Ulimit> getUlimits() {
return ulimits
}
String getImage() {
return image
}
List<String> getCommand() {
return command
}
List<ResourceRequirement> getResourceRequirements() {
return resourceRequirements
}
String getJobRoleArn() {
return jobRoleArn
}
String getExecutionRoleArn() {
return executionRoleArn
}
LogConfiguration getLogConfiguration() {
return logConfiguration
}
List<MountPoint> getMountPoints() {
return mountPoints
}
List<Volume> getVolumes() {
return volumes
}
NetworkConfiguration getNetworkConfiguration() {
return networkConfiguration
}
EphemeralStorage getEphemeralStorage() {
return ephemeralStorage
}
RuntimePlatform getRuntimePlatform() {
return runtimePlatform
}
ContainerProperties toBatchContainerProperties() {
def builder = ContainerProperties.builder()
if (image) builder.image(image)
if (command) builder.command(command)
if (resourceRequirements) builder.resourceRequirements(resourceRequirements)
if (jobRoleArn) builder.jobRoleArn(jobRoleArn)
if (executionRoleArn) builder.executionRoleArn(executionRoleArn)
if (linuxParameters) builder.linuxParameters(linuxParameters)
if (environment) builder.environment(environment)
if (privileged) builder.privileged(privileged)
if (user) builder.user(user)
if (readonlyRootFilesystem) builder.readonlyRootFilesystem(readonlyRootFilesystem)
if (ulimits) builder.ulimits(ulimits)
if (logConfiguration) builder.logConfiguration(logConfiguration)
if (mountPoints) builder.mountPoints(mountPoints)
if (volumes) builder.volumes(volumes)
if (networkConfiguration) builder.networkConfiguration(networkConfiguration)
if (ephemeralStorage) builder.ephemeralStorage(ephemeralStorage)
if (runtimePlatform) builder.runtimePlatform(runtimePlatform)
if (secrets) builder.secrets(secrets)
return builder.build()
}
@Override
public String toString() {
return "ContainerPropertiesModel{" +
"image='" + image + '\'' +
", command=" + command +
", resourceRequirements=" + resourceRequirements +
", jobRoleArn='" + jobRoleArn + '\'' +
", executionRoleArn='" + executionRoleArn + '\'' +
", linuxParameters=" + linuxParameters +
", environment=" + environment +
", privileged=" + privileged +
", user='" + user + '\'' +
", readonlyRootFilesystem=" + readonlyRootFilesystem +
", ulimits=" + ulimits +
", logConfiguration=" + logConfiguration +
", mountPoints=" + mountPoints +
", volumes=" + volumes +
", networkConfiguration=" + networkConfiguration +
", ephemeralStorage=" + ephemeralStorage +
", runtimePlatform=" + runtimePlatform +
", secrets=" + secrets +
'}';
}
}

View File

@@ -0,0 +1,154 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch.model
import groovy.transform.CompileStatic
import software.amazon.awssdk.services.batch.model.ConsumableResourceProperties
import software.amazon.awssdk.services.batch.model.JobDefinitionType
import software.amazon.awssdk.services.batch.model.PlatformCapability
import software.amazon.awssdk.services.batch.model.RegisterJobDefinitionRequest
/**
* Custom mutable RegisterJobDefinitionRequest class that allows subclasses to modify the request
* before converting it to the immutable AWS SDK object.
*
* This is a mutable version of {@link RegisterJobDefinitionRequest} required
* to simplify the extension of container settings in the AWS Batch executor
* and its sub-classes (e.g. nf-xpack).
*/
@CompileStatic
class RegisterJobDefinitionModel {
private String jobDefinitionName
private JobDefinitionType type
private List<PlatformCapability> platformCapabilities
private ContainerPropertiesModel containerProperties
private Map<String,String> parameters
private Map<String,String> tags
private ConsumableResourceProperties consumableResourceProperties
RegisterJobDefinitionModel jobDefinitionName(String value) {
this.jobDefinitionName = value
return this
}
RegisterJobDefinitionModel type(JobDefinitionType value) {
this.type = value
return this
}
RegisterJobDefinitionModel platformCapabilities(List<PlatformCapability> value) {
this.platformCapabilities = value
return this
}
RegisterJobDefinitionModel containerProperties(ContainerPropertiesModel value) {
this.containerProperties = value
return this
}
RegisterJobDefinitionModel parameters(Map<String,String> value) {
this.parameters = value
return this
}
RegisterJobDefinitionModel tags(Map<String,String> value) {
this.tags = value
return this
}
RegisterJobDefinitionModel addTagsEntry(String key, String value) {
if( this.tags==null )
this.tags = new LinkedHashMap<>()
this.tags.put(key, value)
return this
}
RegisterJobDefinitionModel consumableResourceProperties(ConsumableResourceProperties value) {
this.consumableResourceProperties = value
return this
}
String getJobDefinitionName() {
return jobDefinitionName
}
JobDefinitionType getType() {
return type
}
List<PlatformCapability> getPlatformCapabilities() {
return platformCapabilities
}
ContainerPropertiesModel getContainerProperties() {
return containerProperties
}
Map<String, String> getParameters() {
return parameters
}
Map<String, String> getTags() {
return tags
}
ConsumableResourceProperties getConsumableResourceProperties() {
return consumableResourceProperties
}
RegisterJobDefinitionRequest toBatchRequest() {
final builder = RegisterJobDefinitionRequest.builder()
if (jobDefinitionName)
builder.jobDefinitionName(jobDefinitionName)
if (type)
builder.type(type)
if (platformCapabilities)
builder.platformCapabilities(platformCapabilities)
if (containerProperties)
builder.containerProperties(containerProperties.toBatchContainerProperties())
if (consumableResourceProperties)
builder.consumableResourceProperties(consumableResourceProperties)
if (parameters)
builder.parameters(parameters)
if (tags)
builder.tags(tags)
return (RegisterJobDefinitionRequest) builder.build()
}
@Override
String toString() {
return "RegisterJobDefinitionModel{" +
"jobDefinitionName='" + jobDefinitionName + '\'' +
", type=" + type +
", platformCapabilities=" + platformCapabilities +
", containerProperties=" + containerProperties +
", parameters=" + parameters +
", tags=" + tags +
", consumableResourceProperties=" + consumableResourceProperties +
'}';
}
}

View File

@@ -0,0 +1,215 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.config
import java.nio.file.Path
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.SysEnv
import nextflow.cloud.CloudTransferOptions
import nextflow.cloud.aws.batch.AwsOptions
import nextflow.config.spec.ConfigOption
import nextflow.config.spec.ConfigScope
import nextflow.script.dsl.Description
import nextflow.exception.ProcessUnrecoverableException
import nextflow.util.Duration
/**
* Model AWS Batch config settings
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@Slf4j
@CompileStatic
class AwsBatchConfig implements CloudTransferOptions, ConfigScope {
public static final int DEFAULT_AWS_MAX_ATTEMPTS = 5
@ConfigOption
@Description("""
The path where the AWS command line tool is installed in the host AMI.
""")
final String cliPath
@ConfigOption
@Description("""
Delay between download attempts from S3 (default: `10 sec`).
""")
final Duration delayBetweenAttempts
@ConfigOption
@Description("""
The AWS Batch [Execution Role](https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html) ARN that needs to be used to execute the Batch Job. It is mandatory when using AWS Fargate.
""")
final String executionRole
@ConfigOption
@Description("""
When `true`, add the `--force-glacier-transfer` flag to AWS CLI S3 download commands (default: `false`).
""")
final boolean forceGlacierTransfer
@ConfigOption
@Description("""
The AWS Batch Job Role ARN that needs to be used to execute the Batch Job.
""")
final String jobRole
@ConfigOption
@Description("""
The name of the logs group used by Batch Jobs (default: `/aws/batch/job`).
""")
final String logsGroup
@ConfigOption
@Description("""
Max parallel upload/download transfer operations *per job* (default: `4`).
""")
final int maxParallelTransfers
@ConfigOption
@Description("""
Max number of execution attempts of a job interrupted by a EC2 Spot reclaim event (default: `0`)
""")
final Integer maxSpotAttempts
@ConfigOption
@Description("""
Max number of downloads attempts from S3 (default: `1`).
""")
final int maxTransferAttempts
@ConfigOption
@Description("""
The compute platform type used by AWS Batch. Can be either `ec2` or `fargate`. Set to `fargate` to use [AWS Fargate](https://docs.aws.amazon.com/batch/latest/userguide/fargate.html).
""")
final String platformType
@ConfigOption
@Description("""
The [retry mode](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-retries.html) used to handle rate-limiting by AWS APIs. Can be one of `standard`, `legacy`, `adaptive`, or `built-in` (default: `standard`).
""")
final String retryMode
@ConfigOption
@Description("""
The scheduling priority for all tasks when using [fair-share scheduling](https://aws.amazon.com/blogs/hpc/introducing-fair-share-scheduling-for-aws-batch/) (default: `0`).
""")
final Integer schedulingPriority
@ConfigOption
@Description("""
The share identifier for all tasks when using [fair-share scheduling](https://aws.amazon.com/blogs/hpc/introducing-fair-share-scheduling-for-aws-batch/).
""")
final String shareIdentifier
@ConfigOption
@Description("""
When `true`, jobs that cannot be scheduled due to lack of resources or misconfiguration are terminated and handled as task failures (default: `false`).
""")
final boolean terminateUnschedulableJobs
@ConfigOption
@Description("""
List of container mounts. Mounts can be specified as simple e.g. `/some/path` or canonical format e.g. `/host/path:/mount/path[:ro|rw]`.
""")
final List<String> volumes
/**
* The path for the `s5cmd` tool as an alternative to `aws s3` CLI to upload/download files
*/
String s5cmdPath
/**
* Whenever it should use Fargate API
*/
boolean fargateMode
AwsBatchConfig(Map opts) {
fargateMode = opts.platformType == 'fargate'
cliPath = !fargateMode ? parseCliPath(opts.cliPath as String) : null
s5cmdPath = fargateMode ? parses5cmdPath(opts.cliPath as String) : null
maxParallelTransfers = opts.maxParallelTransfers as Integer ?: MAX_TRANSFER
maxTransferAttempts = opts.maxTransferAttempts as Integer ?: defaultMaxTransferAttempts()
delayBetweenAttempts = opts.delayBetweenAttempts as Duration ?: DEFAULT_DELAY_BETWEEN_ATTEMPTS
maxSpotAttempts = opts.maxSpotAttempts!=null ? opts.maxSpotAttempts as Integer : null
volumes = makeVols(opts.volumes)
jobRole = opts.jobRole
logsGroup = opts.logsGroup
retryMode = opts.retryMode ?: 'standard'
shareIdentifier = opts.shareIdentifier
schedulingPriority = opts.schedulingPriority as Integer ?: 0
executionRole = opts.executionRole
terminateUnschedulableJobs = opts.terminateUnschedulableJobs as boolean
forceGlacierTransfer = opts.forceGlacierTransfer as boolean
if( retryMode == 'built-in' )
retryMode = null // this force falling back on NF built-in retry mode instead of delegating to AWS CLI tool
if( retryMode && retryMode !in AwsOptions.VALID_RETRY_MODES )
log.warn "Unexpected value for 'aws.batch.retryMode' config setting - offending value: $retryMode - valid values: ${AwsOptions.VALID_RETRY_MODES.join(',')}"
}
protected int defaultMaxTransferAttempts() {
final env = SysEnv.get()
return env.AWS_MAX_ATTEMPTS ? env.AWS_MAX_ATTEMPTS as int : DEFAULT_AWS_MAX_ATTEMPTS
}
private String parseCliPath(String value) {
if( !value )
return null
if( value.tokenize('/ ').contains('s5cmd') )
return null
if( !value.startsWith('/') )
throw new ProcessUnrecoverableException("Not a valid aws-cli tools path: $value -- it must be an absolute path")
if( !value.endsWith('/bin/aws'))
throw new ProcessUnrecoverableException("Not a valid aws-cli tools path: $value -- it must end with the `/bin/aws` suffix")
return value
}
protected List<String> makeVols(obj) {
if( !obj )
return new ArrayList<String>(10)
if( obj instanceof List )
return ((List)obj).collect { normPath0(it.toString()) }
if( obj instanceof CharSequence )
return obj.toString().tokenize(',').collect { normPath0(it) }
throw new IllegalArgumentException("Not a valid `aws.batch.volumes` value: $obj [${obj.getClass().getName()}]")
}
protected String normPath0(String it) {
def result = it.trim()
while( result.endsWith('/') && result.size()>1 )
result = result.substring(0,result.size()-1)
return result
}
AwsBatchConfig addVolume(Path path) {
assert path.scheme == 'file'
def location = path.toString()
if( !volumes.contains(location) )
volumes.add(location)
return this
}
protected String parses5cmdPath(String value) {
if( !value )
return 's5cmd'
if( value.tokenize('/ ').contains('s5cmd') )
return value
return 's5cmd'
}
}

View File

@@ -0,0 +1,221 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.config
import java.nio.file.Path
import java.nio.file.Paths
import software.amazon.awssdk.regions.Region
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.Global
import nextflow.SysEnv
import nextflow.config.spec.ConfigOption
import nextflow.config.spec.ConfigScope
import nextflow.config.spec.ScopeName
import nextflow.script.dsl.Description
import nextflow.util.IniFile
/**
* Model AWS cloud configuration settings
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@ScopeName("aws")
@Description("""
The `aws` scope controls the interactions with AWS, including AWS Batch and S3.
""")
@Slf4j
@CompileStatic
class AwsConfig implements ConfigScope {
final AwsBatchConfig batch
final AwsS3Config client
@ConfigOption
@Description("""
AWS region (e.g. `us-east-1`).
""")
final String region
@ConfigOption
@Description("""
AWS account access key.
""")
final String accessKey
@ConfigOption
@Description("""
AWS account secret key.
""")
final String secretKey
@ConfigOption
@Description("""
AWS profile from `~/.aws/credentials`.
""")
final String profile
/* required by extension point -- do not remove */
AwsConfig() {}
AwsConfig(Map opts) {
this.accessKey = opts.accessKey
this.secretKey = opts.secretKey
this.profile = getAwsProfile0(SysEnv.get(), opts)
this.region = getAwsRegion(SysEnv.get(), opts)
this.batch = new AwsBatchConfig((Map)opts.batch ?: Collections.emptyMap())
this.client = new AwsS3Config((Map)opts.client ?: Collections.emptyMap())
}
List<String> getCredentials() {
return accessKey && secretKey
? List.of(accessKey, secretKey)
: Collections.<String>emptyList()
}
AwsS3Config getS3Config() { client }
AwsBatchConfig getBatchConfig() { batch }
@Deprecated
String getS3GlobalRegion() {
return !region || !s3Config.endpoint || s3Config.endpoint.contains(".amazonaws.com")
? Region.US_EAST_1.id() // always use US_EAST_1 as global region for AWS endpoints
: region // for custom endpoint use the config provided region
}
/**
* Resolves the region used for S3 evaluating the region resolved from config and a possible region defined in the endpoint.
* Fallback to the global region US_EAST_1 when no region is found.
*
* Preference:
* 1. endpoint region
* 2. config region
* 3. US_EAST_1
*
* @returns Resolved region.
**/
String resolveS3Region() {
final epRegion = client.getEndpointRegion()
return epRegion ?: this.region ?: Region.US_EAST_1.id()
}
static protected String getAwsProfile0(Map env, Map<String,Object> config) {
final profile = config?.profile as String
if( profile )
return profile
if( env?.containsKey('AWS_PROFILE'))
return env.get('AWS_PROFILE')
if( env?.containsKey('AWS_DEFAULT_PROFILE'))
return env.get('AWS_DEFAULT_PROFILE')
return null
}
static protected String getAwsRegion(Map env, Map config) {
def home = Paths.get(System.properties.get('user.home') as String)
def file = home.resolve('.aws/config')
return getAwsRegion0(env, config, file)
}
static protected String getAwsRegion0(Map env, Map config, Path awsFile) {
// check nxf config file
if( config instanceof Map ) {
def region = config.region
if( region )
return region.toString()
}
if( env && env.AWS_DEFAULT_REGION ) {
return env.AWS_DEFAULT_REGION.toString()
}
if( !awsFile.exists() ) {
return null
}
final profile = getAwsProfile0(env, config) ?: 'default'
final ini = new IniFile(awsFile)
return ini.section(profile).region
}
Map getS3LegacyProperties() {
final result = new LinkedHashMap(20)
// -- remaining client config options
def config = client.getAwsClientConfig()
config = checkDefaultErrorRetry(config, SysEnv.get())
if( config ) {
result.putAll(config)
}
log.debug "AWS S3 config properties: ${dumpAwsConfig(result)}"
return result
}
static protected Map checkDefaultErrorRetry(Map result, Map env) {
if( result == null )
result = new HashMap(10)
if( result.max_error_retry==null ) {
result.max_error_retry = env?.AWS_MAX_ATTEMPTS
}
// fallback to default
if( result.max_error_retry==null ) {
result.max_error_retry = '5'
}
// make sure that's a string value as it's expected by the client
else {
result.max_error_retry = result.max_error_retry.toString()
}
return result
}
static private String dumpAwsConfig( Map<String,String> config ) {
def result = new HashMap(config)
if( config.access_key && config.access_key.size()>6 )
result.access_key = "${config.access_key.substring(0,6)}.."
if( config.secret_key && config.secret_key.size()>6 )
result.secret_key = "${config.secret_key.substring(0,6)}.."
if( config.session_token && config.session_token.size()>6 )
result.session_token = "${config.session_token.substring(0,6)}.."
return result.toString()
}
static private AwsConfig getConfig0(Map config) {
if( config==null ) {
log.warn("Missing nextflow session config object")
return new AwsConfig(Collections.emptyMap())
}
new AwsConfig( (Map)config.aws ?: Collections.emptyMap() )
}
static AwsConfig config() {
getConfig0(Global.config)
}
}

View File

@@ -0,0 +1,365 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.config
import static nextflow.cloud.aws.util.AwsHelper.*
import software.amazon.awssdk.regions.Region
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.SysEnv
import nextflow.config.spec.ConfigOption
import nextflow.config.spec.ConfigScope
import nextflow.script.dsl.Description
import nextflow.file.FileHelper
import nextflow.util.Duration
import nextflow.util.MemoryUnit
/**
* Model AWS S3 config settings
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@Slf4j
@CompileStatic
class AwsS3Config implements ConfigScope {
@ConfigOption
@Description("""
Allow the access of public S3 buckets without providing AWS credentials (default: `false`). Any service that does not accept unsigned requests will return a service access error.
""")
final Boolean anonymous
@ConfigOption
@Description("""
The amount of time to wait (in milliseconds) when initially establishing a connection before timing out (default: `10000`).
""")
final Integer connectionTimeout
final Boolean debug
@ConfigOption
@Description("""
The AWS S3 API entry point e.g. `https://s3-us-west-1.amazonaws.com`. The endpoint must include the protocol prefix e.g. `https://`.
""")
final String endpoint
/**
* Maximum number of concurrent transfers used by S3 transfer manager. By default,
* it is determined automatically by `targetThroughputInGbps`.
*/
@ConfigOption
final Integer maxConcurrency
@ConfigOption
@Description("""
The maximum number of open HTTP connections used by the S3 client (default: `50`).
""")
final Integer maxConnections
@ConfigOption
@Description("""
The maximum size for the heap memory buffer used by concurrent downloads. It must be at least 10 times the `minimumPartSize` (default:`400 MB`).
""")
final MemoryUnit maxDownloadHeapMemory
@ConfigOption
@Description("""
The maximum number of retry attempts for failed retryable requests (default: `-1`).
""")
final Integer maxErrorRetry
/**
* Maximum native memory used by S3 transfer manager. By default, it is
* determined automatically by `targetThroughputInGbps`.
*/
@ConfigOption
final MemoryUnit maxNativeMemory
@ConfigOption
@Description("""
The minimum part size used for multipart uploads to S3 (default: `8 MB`).
""")
final MemoryUnit minimumPartSize
@ConfigOption
@Description("""
The object size threshold used for multipart uploads to S3 (default: same as `aws.cllient.minimumPartSize`).
""")
final MemoryUnit multipartThreshold
@ConfigOption
@Description("""
The proxy host to connect through.
""")
final String proxyHost
@ConfigOption
@Description("""
The port to use when connecting through a proxy.
""")
final Integer proxyPort
@ConfigOption
@Description("""
The protocol scheme to use when connecting through a proxy. Can be `http` or `https` (default: `'http'`).
""")
final String proxyScheme
@ConfigOption
@Description("""
The user name to use when connecting through a proxy.
""")
final String proxyUsername
@ConfigOption
@Description("""
The password to use when connecting through a proxy.
""")
final String proxyPassword
@ConfigOption
@Description("""
Use [Requester Pays](https://docs.aws.amazon.com/AmazonS3/latest/userguide/RequesterPaysBuckets.html) for S3 buckets (default: `false`).
""")
final Boolean requesterPays
@ConfigOption(types=[String])
@Description("""
Specify predefined bucket permissions, also known as [canned ACL](https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html#canned-acl). Can be one of `Private`, `PublicRead`, `PublicReadWrite`, `AuthenticatedRead`, `LogDeliveryWrite`, `BucketOwnerRead`, `BucketOwnerFullControl`, or `AwsExecRead`.
""")
final ObjectCannedACL s3Acl
@ConfigOption
@Description("""
Use the path-based access model to access objects in S3-compatible storage systems (default: `false`).
""")
final Boolean s3PathStyleAccess
@ConfigOption
@Description("""
The amount of time to wait (in milliseconds) for data to be transferred over an established, open connection before the connection is timed out (default: `30000`).
""")
final Integer socketTimeout
@ConfigOption
@Description("""
The S3 storage class applied to stored objects, one of \\[`STANDARD`, `STANDARD_IA`, `ONEZONE_IA`, `INTELLIGENT_TIERING`\\] (default: `STANDARD`).
""")
final String storageClass
@ConfigOption
@Description("""
The S3 server side encryption to be used when saving objects on S3. Can be `AES256` or `aws:kms` (default: none).
""")
final String storageEncryption
@ConfigOption
@Description("""
The AWS KMS key Id to be used to encrypt files stored in the target S3 bucket.
""")
final String storageKmsKeyId
@ConfigOption
@Description("""
The target network throughput (in Gbps) used for S3 uploads and downloads (default: `10`).
""")
final Double targetThroughputInGbps
// deprecated
@Deprecated
@ConfigOption
@Description("""
The size of a single part in a multipart upload (default: `100 MB`).
""")
final MemoryUnit uploadChunkSize
@Deprecated
@ConfigOption
@Description("""
The maximum number of upload attempts after which a multipart upload returns an error (default: `5`).
""")
final Integer uploadMaxAttempts
@Deprecated
@ConfigOption
@Description("""
The maximum number of threads used for multipart upload (default: `10`).
""")
final Integer uploadMaxThreads
@Deprecated
@ConfigOption
@Description("""
The time to wait after a failed upload attempt to retry the part upload (default: `500ms`).
""")
final Duration uploadRetrySleep
@Deprecated
@ConfigOption
@Description("""
The S3 storage class applied to stored objects. Can be `STANDARD`, `STANDARD_IA`, `ONEZONE_IA`, or `INTELLIGENT_TIERING` (default: `STANDARD`).
""")
final String uploadStorageClass
private static final long _1MB = 1024 * 1024;
// According to CRT Async client docs https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/s3/S3CrtAsyncClientBuilder.html
public static final long DEFAULT_PART_SIZE = 8 * _1MB;
public static final int DEFAULT_INIT_BUFFER_PARTS = 10;
// Maximum heap buffer size
public static final long DEFAULT_MAX_DOWNLOAD_BUFFER_SIZE = 400 * _1MB;
AwsS3Config(Map opts) {
this.anonymous = opts.anonymous as Boolean
this.connectionTimeout = opts.connectionTimeout as Integer
this.debug = opts.debug as Boolean
this.endpoint = opts.endpoint ?: SysEnv.get('AWS_S3_ENDPOINT')
if( endpoint && FileHelper.getUrlProtocol(endpoint) !in ['http','https'] )
throw new IllegalArgumentException("S3 endpoint must begin with http:// or https:// prefix - offending value: '${endpoint}'")
this.maxConcurrency = opts.maxConcurrency as Integer
this.maxConnections = opts.maxConnections as Integer
this.maxDownloadHeapMemory = opts.maxDownloadHeapMemory as MemoryUnit
this.maxErrorRetry = opts.maxErrorRetry as Integer
this.maxNativeMemory = opts.maxNativeMemory as MemoryUnit
this.minimumPartSize = opts.minimumPartSize as MemoryUnit
this.multipartThreshold = opts.multipartThreshold as MemoryUnit
this.proxyHost = opts.proxyHost
this.proxyPort = opts.proxyPort as Integer
this.proxyScheme = opts.proxyScheme
this.proxyUsername = opts.proxyUsername
this.proxyPassword = opts.proxyPassword
this.requesterPays = opts.requesterPays as Boolean
this.s3Acl = parseS3Acl(opts.s3Acl as String)
this.s3PathStyleAccess = opts.s3PathStyleAccess as Boolean
this.socketTimeout = opts.socketTimeout as Integer
this.storageClass = parseStorageClass((opts.storageClass ?: opts.uploadStorageClass) as String) // 'uploadStorageClass' is kept for legacy purposes
this.storageEncryption = parseStorageEncryption(opts.storageEncryption as String)
this.storageKmsKeyId = opts.storageKmsKeyId
this.targetThroughputInGbps = opts.targetThroughputInGbps as Double
this.uploadChunkSize = opts.uploadChunkSize as MemoryUnit
this.uploadMaxAttempts = opts.uploadMaxAttempts as Integer
this.uploadMaxThreads = opts.uploadMaxThreads as Integer
this.uploadRetrySleep = opts.uploadRetrySleep as Duration
checkDownloadBufferParams()
}
private String parseStorageClass(String value) {
if( value in [null, 'STANDARD', 'STANDARD_IA', 'ONEZONE_IA', 'INTELLIGENT_TIERING', 'REDUCED_REDUNDANCY' ]) {
if (value == 'REDUCED_REDUNDANCY') {
log.warn "AWS S3 Storage Class `REDUCED_REDUNDANCY` is deprecated (and more expensive than `STANDARD`). For cost savings, look to `STANDARD_IA`, `ONEZONE_IA`, `INTELLIGENT_TIERING`."
}
return value
} else {
log.warn "Unsupported AWS storage-class: $value"
return null
}
}
private String parseStorageEncryption(String value) {
if( value in [null,'AES256','aws:kms'] )
return value
//
log.warn "Unsupported AWS storage-encryption: $value"
return null
}
// ==== getters =====
Boolean getPathStyleAccess() {
return s3PathStyleAccess
}
boolean isCustomEndpoint() {
endpoint && !endpoint.endsWith(".amazonaws.com")
}
/**
* Looks for the region defined in endpoints such as https://xxx.<region>.amazonaws.com
* @returns Region defined in the endpoint. Null if no endpoint or custom endpoint is defined,
* or when URI region subdomain doesn't match with a region (global or multi-region access point)
*/
String getEndpointRegion(){
if( !endpoint || isCustomEndpoint() )
return null
try {
String host = URI.create(endpoint).getHost()
final hostDomains = host.split('\\.')
if (hostDomains.size() < 3) {
log.debug("Region subdomain doesn't exist in endpoint '${endpoint}'")
return null
}
final region = hostDomains[hostDomains.size()-3]
if (!Region.regions().contains(Region.of(region))){
log.debug("Region '${region}' extracted from endpoint '${endpoint}' is not valid")
return null
}
return region
} catch (Exception e){
log.debug("Exception getting region from endpoint: '${endpoint}' - ${e.message}")
return null
}
}
Map<String,String> getAwsClientConfig() {
return [
connection_timeout: connectionTimeout?.toString(),
max_concurrency: maxConcurrency?.toString(),
max_connections: maxConnections?.toString(),
max_download_heap_memory: maxDownloadHeapMemory?.toBytes()?.toString(),
max_error_retry: maxErrorRetry?.toString(),
max_native_memory: maxNativeMemory?.toBytes()?.toString(),
minimum_part_size: minimumPartSize?.toBytes()?.toString(),
multipart_threshold: multipartThreshold?.toBytes()?.toString(),
proxy_host: proxyHost?.toString(),
proxy_port: proxyPort?.toString(),
proxy_scheme: proxyScheme?.toString(),
proxy_username: proxyUsername?.toString(),
proxy_password: proxyPassword?.toString(),
requester_pays: requesterPays?.toString(),
s3_acl: s3Acl?.toString(),
socket_timeout: socketTimeout?.toString(),
storage_encryption: storageEncryption?.toString(),
storage_kms_key_id: storageKmsKeyId?.toString(),
target_throughput_in_gbps: targetThroughputInGbps?.toString(),
upload_chunk_size: uploadChunkSize?.toBytes()?.toString(),
upload_max_attempts: uploadMaxAttempts?.toString(),
upload_max_threads: uploadMaxThreads?.toString(),
upload_retry_sleep: uploadRetrySleep?.toMillis()?.toString(),
upload_storage_class: storageClass?.toString()
].findAll { k, v -> v != null }
}
void checkDownloadBufferParams() {
if( maxDownloadHeapMemory != null && maxDownloadHeapMemory.toBytes() == 0L ) {
throw new IllegalArgumentException("Configuration option `aws.client.maxDownloadHeapMemory` can't be 0")
}
if( minimumPartSize != null && minimumPartSize.toBytes() == 0L ) {
throw new IllegalArgumentException("Configuration option `aws.client.minimumPartSize` can't be 0")
}
if( maxDownloadHeapMemory != null || minimumPartSize != null ) {
final maxBuffer = maxDownloadHeapMemory ? maxDownloadHeapMemory.toBytes() : DEFAULT_MAX_DOWNLOAD_BUFFER_SIZE
final partSize = minimumPartSize ? minimumPartSize.toBytes() : DEFAULT_PART_SIZE
if( maxBuffer < DEFAULT_INIT_BUFFER_PARTS * partSize ) {
throw new IllegalArgumentException("Configuration option `aws.client.maxDownloadHeapMemory` must be at least " + DEFAULT_INIT_BUFFER_PARTS + " times `aws.client.minimumPartSize`")
}
}
}
}

View File

@@ -0,0 +1,74 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.fusion
import groovy.transform.CompileStatic
import nextflow.SysEnv
import nextflow.cloud.aws.config.AwsConfig
import nextflow.fusion.FusionConfig
import nextflow.fusion.FusionEnv
import org.pf4j.Extension
/**
* Implements {@link FusionEnv} for AWS cloud
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@Extension
@CompileStatic
class AwsFusionEnv implements FusionEnv {
@Override
Map<String, String> getEnvironment(String scheme, FusionConfig config) {
if( scheme!='s3' )
return Collections.<String,String>emptyMap()
final result = new HashMap<String,String>()
final awsConfig = AwsConfig.config()
final endpoint = awsConfig.s3Config.endpoint
final creds = config.exportStorageCredentials() ? awsCreds(awsConfig) : List.<String>of()
if( creds ) {
result.AWS_ACCESS_KEY_ID = creds[0]
result.AWS_SECRET_ACCESS_KEY = creds[1]
if( creds.size() > 2 )
result.AWS_SESSION_TOKEN = creds[2]
}
if( endpoint )
result.AWS_S3_ENDPOINT = endpoint
if( awsConfig.region && awsConfig.s3Config.isCustomEndpoint() )
result.FUSION_AWS_REGION = awsConfig.region
if( awsConfig.s3Config.storageEncryption )
result.FUSION_AWS_SERVER_SIDE_ENCRYPTION = awsConfig.s3Config.storageEncryption
if( awsConfig.s3Config.storageKmsKeyId )
result.FUSION_AWS_SSEKMS_KEY_ID = awsConfig.s3Config.storageKmsKeyId
return result
}
protected List<String> awsCreds(AwsConfig awsConfig) {
final result = awsConfig.getCredentials()
if( result )
return result
if( SysEnv.get('AWS_ACCESS_KEY_ID') && SysEnv.get('AWS_SECRET_ACCESS_KEY') && SysEnv.get('AWS_SESSION_TOKEN') )
return List.<String>of(SysEnv.get('AWS_ACCESS_KEY_ID'), SysEnv.get('AWS_SECRET_ACCESS_KEY'), SysEnv.get('AWS_SESSION_TOKEN'))
if( SysEnv.get('AWS_ACCESS_KEY_ID') && SysEnv.get('AWS_SECRET_ACCESS_KEY') )
return List.<String>of(SysEnv.get('AWS_ACCESS_KEY_ID'), SysEnv.get('AWS_SECRET_ACCESS_KEY'))
else
return List.<String>of()
}
}

View File

@@ -0,0 +1,66 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.mail
import javax.mail.internet.MimeMessage
import software.amazon.awssdk.core.SdkBytes
import software.amazon.awssdk.services.ses.SesClient
import software.amazon.awssdk.services.ses.model.RawMessage
import software.amazon.awssdk.services.ses.model.SendRawEmailRequest
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.mail.MailProvider
import nextflow.mail.Mailer
/**
* Send a mime message via AWS SES raw API
*
* https://docs.aws.amazon.com/ses/latest/dg/send-email-raw.html
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@CompileStatic
@Slf4j
class AwsMailProvider implements MailProvider {
@Override
String name() {
return 'aws-ses'
}
@Override
boolean textOnly() {
return false
}
@Override
void send(MimeMessage message, Mailer mailer) {
final client = getEmailClient()
// dump the message to a buffer
final outputStream = new ByteArrayOutputStream()
message.writeTo(outputStream)
// send the email
final rawMessage = RawMessage.builder().data(SdkBytes.fromByteArray(outputStream.toByteArray())).build()
final result = client.sendRawEmail(SendRawEmailRequest.builder().rawMessage(rawMessage).build())
log.debug "Mail message sent: ${result}"
}
SesClient getEmailClient() {
return SesClient.builder().build()
}
}

View File

@@ -0,0 +1,644 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InterruptedIOException;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.EnumSet;
import java.util.Queue;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.*;
import java.util.function.Consumer;
import java.util.function.Supplier;
import nextflow.cloud.aws.nio.util.ExtendedS3TransferManager;
import nextflow.cloud.aws.nio.util.S3SyncClientConfiguration;
import nextflow.extension.FilesEx;
import nextflow.cloud.aws.AwsClientFactory;
import nextflow.cloud.aws.nio.util.S3AsyncClientConfiguration;
import nextflow.cloud.aws.util.AwsHelper;
import nextflow.util.ThreadPoolManager;
import nextflow.util.Threads;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
import software.amazon.awssdk.core.ResponseInputStream;
import software.amazon.awssdk.core.exception.SdkException;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.services.s3.model.*;
import software.amazon.awssdk.services.s3.paginators.ListObjectsV2Iterable;
import software.amazon.awssdk.transfer.s3.S3TransferManager;
import software.amazon.awssdk.transfer.s3.model.*;
/**
* Client Amazon S3
* @see software.amazon.awssdk.services.s3.S3Client
*/
public class S3Client {
private static final Logger log = LoggerFactory.getLogger(S3Client.class);
private software.amazon.awssdk.services.s3.S3Client client;
// Semaphore to limit concurrent client connections when using virtual threads.
private Semaphore semaphore;
private ObjectCannedACL cannedAcl;
private String kmsKeyId;
private ServerSideEncryption storageEncryption;
private ExtendedS3TransferManager transferManager;
private ExecutorService transferPool;
private Integer transferManagerThreads = 10;
private Boolean isRequesterPaysEnabled = false;
private String callerAccount;
private AwsClientFactory factory;
private Properties props;
private boolean global;
public S3Client(AwsClientFactory factory, Properties props, boolean global) {
S3SyncClientConfiguration clientConfig = S3SyncClientConfiguration.create(props);
this.factory = factory;
this.props = props;
this.global = global;
this.client = factory.getS3Client(clientConfig, global);
this.semaphore = Threads.useVirtual() ? new Semaphore(clientConfig.getMaxConnections()) : null;
this.callerAccount = fetchCallerAccount();
}
/**
* Perform an action that requires the S3 semaphore to limit concurrent connections.
*
* @param action
*/
private <T> T runWithPermit(Supplier<T> action) {
try {
if (semaphore != null) semaphore.acquire();
try {
return action.get();
} finally {
if (semaphore != null) semaphore.release();
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException("Interrupted while acquiring S3 client semaphore", e);
}
}
/**
* AmazonS3Client#getS3AccountOwner() is not available in SDK v2.
* The STSClient#getCallerIdentity returns the account, but it does not include the canonical ID required for ACLs.
*
* This function and the fetchCallerAccount() emulate the old behavior retrieving the canonicalId can only be
* retrieved if the user owns a bucket.
*/
public String getCallerAccount() {
return callerAccount;
}
private String fetchCallerAccount() {
try {
List<Bucket> buckets = runWithPermit(() -> client.listBuckets(ListBucketsRequest.builder().maxBuckets(1).build()).buckets());
if (buckets == null || buckets.isEmpty())
return null;
return getBucketAcl(buckets.get(0).name()).owner().id();
} catch (Throwable e) {
log.debug("Unable to fetch caller account - {} ", e.getMessage());
return null;
}
}
/**
* @see software.amazon.awssdk.services.s3.S3Client#listBuckets()
*/
public List<Bucket> listBuckets() throws IOException {
try {
return runWithPermit(() -> client.listBuckets().buckets());
} catch (SdkException e) {
throw convertAwsException(e, "listBuckets", null, null);
}
}
/**
* @see software.amazon.awssdk.services.s3.S3Client#listObjects(ListObjectsRequest)
*/
public ListObjectsResponse listObjects(ListObjectsRequest request) throws IOException {
try {
return runWithPermit(() -> client.listObjects(request));
} catch (SdkException e) {
throw convertAwsException(e, "listObject", request.bucket(), request.prefix());
}
}
/**
* Convert an AWS SDK exception into the most appropriate {@link IOException} subtype
* so callers can handle it via standard NIO semantics.
*
* The original {@code SdkException} is always attached as the cause for diagnostics.
*/
// package-private for testing
static IOException convertAwsException(SdkException e, String method, String bucket, String key) {
final String s3path = (key != null && !key.isEmpty())
? "s3://" + bucket + "/" + key
: "s3://" + (bucket != null ? bucket : "");
final String message = String.format("Exception calling %s for %s", method, s3path);
if (e instanceof NoSuchBucketException || e instanceof NoSuchKeyException) {
final NoSuchFileException nsfe = new NoSuchFileException(s3path);
nsfe.initCause(e);
return nsfe;
}
if (e instanceof AwsServiceException) {
final int code = ((AwsServiceException) e).statusCode();
if (code == 404) {
final NoSuchFileException nsfe = new NoSuchFileException(s3path);
nsfe.initCause(e);
return nsfe;
}
if (code == 401 || code == 403) {
final AccessDeniedException ade = new AccessDeniedException(s3path, null, e.getMessage());
ade.initCause(e);
return ade;
}
}
return new IOException(message, e);
}
/**
* @see software.amazon.awssdk.services.s3.S3Client#getObject
*/
public ResponseInputStream<GetObjectResponse> getObject(String bucketName, String key) throws IOException {
GetObjectRequest.Builder reqBuilder = GetObjectRequest.builder().bucket(bucketName).key(key);
if( this.isRequesterPaysEnabled )
reqBuilder.requestPayer(RequestPayer.REQUESTER);
try {
return runWithPermit(() -> client.getObject(reqBuilder.build()));
} catch (SdkException e) {
throw convertAwsException(e, "getObject", bucketName, key);
}
}
/**
* @see software.amazon.awssdk.services.s3.S3Client#putObject
*/
public PutObjectResponse putObject(String bucket, String key, File file) throws IOException {
PutObjectRequest.Builder builder = PutObjectRequest.builder().bucket(bucket).key(key);
if( cannedAcl != null ) {
log.trace("Setting canned ACL={}; bucket={}; key={}", cannedAcl, bucket, key);
builder.acl(cannedAcl);
}
try {
return runWithPermit(() -> client.putObject(builder.build(), file.toPath()));
} catch (SdkException e) {
throw convertAwsException(e, "putObject", bucket, key);
}
}
private PutObjectRequest preparePutObjectRequest(PutObjectRequest.Builder reqBuilder, List<Tag> tags, String contentType, String storageClass) {
if( cannedAcl != null ) {
reqBuilder.acl(cannedAcl);
}
if( tags != null && !tags.isEmpty()) {
reqBuilder.tagging(Tagging.builder().tagSet(tags).build());
}
if( kmsKeyId != null ) {
reqBuilder.ssekmsKeyId(kmsKeyId);
}
if( storageEncryption!=null ) {
reqBuilder.serverSideEncryption(storageEncryption);
}
if( contentType!=null ) {
reqBuilder.contentType(contentType);
}
if( storageClass!=null ) {
reqBuilder.storageClass(storageClass);
}
return reqBuilder.build();
}
/**
* @see software.amazon.awssdk.services.s3.S3Client#putObject
*/
public PutObjectResponse putObject(String bucket, String keyName, InputStream inputStream, List<Tag> tags, String contentType, long contentLength) throws IOException {
PutObjectRequest.Builder reqBuilder = PutObjectRequest.builder()
.bucket(bucket)
.key(keyName);
if( cannedAcl != null ) {
reqBuilder.acl(cannedAcl);
}
if( tags != null && !tags.isEmpty()) {
reqBuilder.tagging(Tagging.builder().tagSet(tags).build());
}
if( kmsKeyId != null ) {
reqBuilder.ssekmsKeyId(kmsKeyId);
}
if( storageEncryption!=null ) {
reqBuilder.serverSideEncryption(storageEncryption);
}
if( contentType!=null ) {
reqBuilder.contentType(contentType);
}
PutObjectRequest req = reqBuilder.build();
if( log.isTraceEnabled() ) {
log.trace("S3 PutObject request {}", req);
}
try {
return runWithPermit(() -> client.putObject(req, RequestBody.fromInputStream(inputStream, contentLength)));
} catch (SdkException e) {
throw convertAwsException(e, "putObject", bucket, keyName);
}
}
/**
* @see software.amazon.awssdk.services.s3.S3Client#deleteObject
*/
public void deleteObject(String bucket, String key) throws IOException {
try {
runWithPermit(() -> client.deleteObject(DeleteObjectRequest.builder().bucket(bucket).key(key).build()));
} catch (SdkException e) {
throw convertAwsException(e, "deleteObject", bucket, key);
}
}
/**
* @see software.amazon.awssdk.services.s3.S3Client#getBucketAcl
*/
public AccessControlPolicy getBucketAcl(String bucket) throws IOException {
try {
GetBucketAclResponse response = runWithPermit(() -> client.getBucketAcl(GetBucketAclRequest.builder().bucket(bucket).build()));
return AccessControlPolicy.builder().grants(response.grants()).owner(response.owner()).build();
} catch (SdkException e) {
throw convertAwsException(e, "getBucketAcl", bucket, null);
}
}
public void setCannedAcl(String acl) {
if( acl==null )
return;
this.cannedAcl = AwsHelper.parseS3Acl(acl);
log.debug("Setting S3 canned ACL={} [{}]", this.cannedAcl, acl);
}
public void setKmsKeyId(String kmsKeyId) {
if( kmsKeyId==null )
return;
this.kmsKeyId = kmsKeyId;
log.debug("Setting S3 SSE kms Id={}", kmsKeyId);
}
public void setStorageEncryption(String alg) {
if( alg == null )
return;
this.storageEncryption = ServerSideEncryption.fromValue(alg);
log.debug("Setting S3 SSE storage encryption algorithm={}", alg);
}
public void setRequesterPaysEnabled(String requesterPaysEnabled) {
if( requesterPaysEnabled == null )
return;
this.isRequesterPaysEnabled = Boolean.valueOf(requesterPaysEnabled);
log.debug("Setting S3 requester pays enabled={}", isRequesterPaysEnabled);
}
public ObjectCannedACL getCannedAcl() {
return cannedAcl;
}
public software.amazon.awssdk.services.s3.S3Client getClient() {
return client;
}
/**
* @see software.amazon.awssdk.services.s3.S3Client#getObjectAcl
*/
public AccessControlPolicy getObjectAcl(String bucketName, String key) throws IOException {
try {
GetObjectAclResponse response = runWithPermit(() -> client.getObjectAcl(GetObjectAclRequest.builder().bucket(bucketName).key(key).build()));
return AccessControlPolicy.builder().grants(response.grants()).owner(response.owner()).build();
} catch (SdkException e) {
throw convertAwsException(e, "getObjectAcl", bucketName, key);
}
}
/**
* @see software.amazon.awssdk.services.s3.S3Client#headObject
*/
public HeadObjectResponse getObjectMetadata(String bucketName, String key) throws IOException {
try {
return runWithPermit(() -> client.headObject(HeadObjectRequest.builder().bucket(bucketName).key(key).build()));
} catch (SdkException e) {
throw convertAwsException(e, "getObjectMetadata", bucketName, key);
}
}
/**
* @see software.amazon.awssdk.services.s3.S3Client#headBucket
*/
public HeadBucketResponse getBucketMetadata(String bucketName) throws IOException {
try {
return runWithPermit(() -> client.headBucket(HeadBucketRequest.builder().bucket(bucketName).build()));
} catch (SdkException e) {
throw convertAwsException(e, "getBucketMetadata", bucketName, null);
}
}
public List<Tag> getObjectTags(String bucketName, String key) throws IOException {
try {
return runWithPermit(() -> client.getObjectTagging(GetObjectTaggingRequest.builder().bucket(bucketName).key(key).build()).tagSet());
} catch (SdkException e) {
throw convertAwsException(e, "getObjectTags", bucketName, key);
}
}
public String getObjectKmsKeyId(String bucketName, String key) throws IOException {
return getObjectMetadata(bucketName, key).ssekmsKeyId();
}
/**
* @see software.amazon.awssdk.services.s3.S3Client#listObjectsV2Paginator
*/
public ListObjectsV2Iterable listObjectsV2Paginator(ListObjectsV2Request request) throws IOException {
try {
return runWithPermit(() -> client.listObjectsV2Paginator(request));
} catch (SdkException e) {
throw convertAwsException(e, "listObjects", request.bucket(), request.prefix());
}
}
// ===== transfer manager section =====
synchronized ExtendedS3TransferManager transferManager() {
if( transferManager == null ) {
transferPool = ThreadPoolManager.create("S3TransferManager");
var delegate = S3TransferManager.builder()
.s3Client(factory.getS3AsyncClient(S3AsyncClientConfiguration.create(props), global))
.executor(transferPool)
.build();
transferManager = new ExtendedS3TransferManager(delegate, props);
}
return transferManager;
}
public void downloadFile(S3Path source, File target, long size) throws IOException {
try {
DownloadFileRequest downloadFileRequest = DownloadFileRequest.builder()
.getObjectRequest(b -> b.bucket(source.getBucket()).key(source.getKey()))
.destination(target)
.build();
transferManager().downloadFile(downloadFileRequest,size).completionFuture().get();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new InterruptedIOException(String.format("S3 download file: s3://%s/%s cancelled", source.getBucket(), source.getKey()));
} catch (ExecutionException e) {
String msg = String.format("Exception thrown downloading S3 object s3://%s/%s", source.getBucket(), source.getKey());
throw new IOException(msg, e.getCause());
}
}
private static void createDirectory(Path dir) throws IOException {
try {
Files.createDirectory(dir);
} catch (FileAlreadyExistsException e) {
log.trace("File already exists: " + dir);
}
}
public void downloadDirectory(S3Path source, File targetFile) throws IOException {
//
// the download directory method provided by the TransferManager replicates
// the source files directory structure in the target path
// see https://github.com/aws/aws-sdk-java/issues/1321
//
// just traverse to source path a copy all files
//
final Path target = targetFile.toPath();
final Queue<OngoingFileDownload> allDownloads = new LinkedList<>();
final InterruptedIOException[] traversalInterruption = {null};
FileVisitor<Path> visitor = new SimpleFileVisitor<Path>() {
public FileVisitResult preVisitDirectory(Path current, BasicFileAttributes attr) throws IOException {
// get the *delta* path against the source path
final Path rel = source.relativize(current);
final String delta = rel != null ? rel.toString() : null;
final Path newFolder = delta != null ? target.resolve(delta) : target;
if(log.isTraceEnabled())
log.trace("Download DIR: " + current + " -> " + newFolder);
// this `copy` creates the new folder, but does not copy the contained files
createDirectory(newFolder);
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult visitFile(Path current, BasicFileAttributes attr) throws IOException {
// get the *delta* path against the source path
final Path rel = source.relativize(current);
final String delta = rel != null ? rel.toString() : null;
final Path newFile = delta != null ? target.resolve(delta) : target;
if( log.isTraceEnabled())
log.trace("Download file: " + current + " -> "+ FilesEx.toUriString(newFile));
try {
S3Path s3Path = (S3Path)current;
DownloadFileRequest downloadFileRequest = DownloadFileRequest.builder()
.getObjectRequest(b -> b.bucket(s3Path.getBucket()).key(s3Path.getKey()))
.destination(newFile)
.build();
FileDownload it = transferManager().downloadFile(downloadFileRequest, attr.size());
allDownloads.add(new OngoingFileDownload(s3Path.getBucket(), s3Path.getKey(), it));
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
// Don't throw immediately - store the exception and continue to clean-up
traversalInterruption[0] = new InterruptedIOException(String.format("S3 download directory: s3://%s/%s interrupted", source.getBucket(), source.getKey()));
return FileVisitResult.TERMINATE;
}
return FileVisitResult.CONTINUE;
}
};
try {
Files.walkFileTree(source, EnumSet.of(FileVisitOption.FOLLOW_LINKS), Integer.MAX_VALUE, visitor);
} finally {
cleanupQueuedDownloads(allDownloads, traversalInterruption[0], source);
}
}
private void cleanupQueuedDownloads(Queue<OngoingFileDownload> allDownloads, InterruptedIOException traversalInterruption, S3Path source) throws IOException {
try {
IOException firstException = null;
while(!allDownloads.isEmpty()) {
OngoingFileDownload current = allDownloads.poll();
try {
current.download.completionFuture().get();
} catch (ExecutionException e) {
Throwable cause = e.getCause();
log.debug("Exception thrown downloading S3 object s3://{}/{}", current.bucket, current.key, cause);
if (firstException == null) {
firstException = new IOException(String.format("Transfer failed for s3://%s/%s", current.bucket, current.key), cause);
} else {
firstException.addSuppressed(cause);
}
}
}
// Throw traversal interruption first if it occurred
if (traversalInterruption != null) {
if (firstException != null) {
traversalInterruption.addSuppressed(firstException);
}
throw traversalInterruption;
}
// Throw download failures if any occurred
if (firstException != null) {
throw new IOException(String.format("Some transfers from S3 download directory: s3://%s/%s failed", source.getBucket(), source.getKey()), firstException);
}
}
catch (InterruptedException e) {
Thread.currentThread().interrupt();
InterruptedIOException interruptedException = new InterruptedIOException(String.format("Interrupted while download directory s3://%s/%s", source.getBucket(), source.getKey()));
if (traversalInterruption != null) {
interruptedException.addSuppressed(traversalInterruption);
}
throw interruptedException;
}
}
public void uploadFile(File source, S3Path target) throws IOException {
var req = PutObjectRequest.builder().bucket(target.getBucket()).key(target.getKey());
preparePutObjectRequest(req, target.getTagsList(), target.getContentType(), target.getStorageClass());
var uploadRequest = UploadFileRequest.builder().putObjectRequest(req.build()).source(source).build();
try {
transferManager().uploadFile(uploadRequest).completionFuture().get();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new InterruptedIOException(String.format("S3 upload file: s3://%s/%s interrupted", target.getBucket(), target.getKey()));
} catch (ExecutionException e) {
String msg = String.format("Exception thrown uploading S3 object s3://%s/%s", target.getBucket(), target.getKey());
throw new IOException(msg, e.getCause());
}
}
private Consumer<UploadFileRequest.Builder> transformUploadRequest(List<Tag> tags) {
return builder -> builder.putObjectRequest(updateBuilder(builder.build().putObjectRequest().toBuilder(), tags).build());
}
private PutObjectRequest.Builder updateBuilder(PutObjectRequest.Builder porBuilder, List<Tag> tags) {
if( cannedAcl != null )
porBuilder.acl(cannedAcl);
if( storageEncryption != null )
porBuilder.serverSideEncryption(storageEncryption);
if( kmsKeyId != null )
porBuilder.ssekmsKeyId(kmsKeyId);
if( tags != null && !tags.isEmpty() )
porBuilder.tagging(Tagging.builder().tagSet(tags).build());
return porBuilder;
}
public void uploadDirectory(File source, S3Path target) throws IOException {
UploadDirectoryRequest request = UploadDirectoryRequest.builder()
.bucket(target.getBucket())
.s3Prefix(target.getKey())
.source(source.toPath())
.uploadFileRequestTransformer(transformUploadRequest(target.getTagsList()))
.build();
try {
CompletedDirectoryUpload completed = transferManager().uploadDirectory(request).completionFuture().get();
if (!completed.failedTransfers().isEmpty()) {
log.debug("S3 upload directory: s3://{}/{} failed transfers", target.getBucket(), target.getKey());
throw new IOException("Some transfers in S3 upload directory: s3://"+ target.getBucket() +"/"+ target.getKey() +" has failed - Transfers: " + completed.failedTransfers() );
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new InterruptedIOException(String.format("S3 upload directory: s3://%s/%s interrupted", target.getBucket(), target.getKey()));
} catch (ExecutionException e) {
String msg = String.format("Exception thrown uploading S3 object s3://%s/%s", target.getBucket(), target.getKey());
throw new IOException(msg, e.getCause());
}
}
public void copyFile(CopyObjectRequest.Builder reqBuilder, List<Tag> tags, String contentType, String storageClass) throws IOException {
if( tags !=null && !tags.isEmpty()) {
log.debug("Setting tags: {}", tags);
reqBuilder.taggingDirective(TaggingDirective.REPLACE);
reqBuilder.tagging(Tagging.builder().tagSet(tags).build());
}
if( cannedAcl != null ) {
reqBuilder.acl(cannedAcl);
}
if( storageEncryption != null ) {
reqBuilder.serverSideEncryption(storageEncryption);
}
if( kmsKeyId !=null ) {
reqBuilder.ssekmsKeyId(kmsKeyId);
}
if( contentType!=null ) {
reqBuilder.metadataDirective(MetadataDirective.REPLACE);
reqBuilder.contentType(contentType);
}
if( storageClass!=null ) {
reqBuilder.storageClass(storageClass);
}
CopyObjectRequest req = reqBuilder.build();
if( log.isTraceEnabled() ) {
log.trace("S3 CopyObject request {}", req);
}
CopyRequest copyRequest = CopyRequest.builder().copyObjectRequest(req).build();
try {
transferManager().copy(copyRequest).completionFuture().get();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new InterruptedIOException(String.format("S3 copy s3://%s/%s to s3://%s/%s interrupted", req.sourceBucket(), req.sourceKey(), req.destinationBucket(), req.destinationKey()));
} catch (ExecutionException e) {
String msg = String.format("Exception thrown copying S3 object form s3://%s/%s to s3://%s/%s", req.sourceBucket(), req.sourceKey(), req.destinationBucket(), req.destinationKey());
throw new IOException(msg, e.getCause());
}
}
static class OngoingFileDownload {
String bucket;
String key;
FileDownload download;
public OngoingFileDownload(String bucket, String key, FileDownload download) {
this.bucket = bucket;
this.key = key;
this.download = download;
}
}
}

View File

@@ -0,0 +1,92 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
import static java.lang.String.format;
public class S3FileAttributes implements BasicFileAttributes {
private final FileTime lastModifiedTime;
private final long size;
private final boolean directory;
private final boolean regularFile;
private final String key;
public S3FileAttributes(String key, FileTime lastModifiedTime, long size,
boolean isDirectory, boolean isRegularFile) {
this.key = key;
this.lastModifiedTime = lastModifiedTime;
this.size = size;
directory = isDirectory;
regularFile = isRegularFile;
}
@Override
public FileTime lastModifiedTime() {
return lastModifiedTime;
}
@Override
public FileTime lastAccessTime() {
return lastModifiedTime;
}
@Override
public FileTime creationTime() {
return lastModifiedTime;
}
@Override
public boolean isRegularFile() {
return regularFile;
}
@Override
public boolean isDirectory() {
return directory;
}
@Override
public boolean isSymbolicLink() {
return false;
}
@Override
public boolean isOther() {
return false;
}
@Override
public long size() {
return size;
}
@Override
public Object fileKey() {
return key;
}
@Override
public String toString() {
return format(
"[%s: lastModified=%s, size=%s, isDirectory=%s, isRegularFile=%s]",
key, lastModifiedTime, size, directory, regularFile);
}
}

View File

@@ -0,0 +1,60 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio;
import java.io.IOException;
import java.nio.file.attribute.BasicFileAttributeView;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
/**
* Implements {@link BasicFileAttributeView} for S3 file storage
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
public class S3FileAttributesView implements BasicFileAttributeView {
private S3FileAttributes target;
S3FileAttributesView(S3FileAttributes target) {
this.target = target;
}
@Override
public String name() {
return "basic";
}
@Override
public BasicFileAttributes readAttributes() throws IOException {
return target;
}
/**
* This API is implemented is not supported but instead of throwing an exception just do nothing
* to not break the method {@code java.nio.file.CopyMoveHelper#copyToForeignTarget(java.nio.file.Path, java.nio.file.Path, java.nio.file.CopyOption...)}
*
* @param lastModifiedTime
* @param lastAccessTime
* @param createTime
* @throws IOException
*/
@Override
public void setTimes(FileTime lastModifiedTime, FileTime lastAccessTime, FileTime createTime) throws IOException {
// not supported
}
}

View File

@@ -0,0 +1,147 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.net.URI;
import java.nio.file.FileStore;
import java.nio.file.FileSystem;
import java.nio.file.Path;
import java.nio.file.PathMatcher;
import java.nio.file.WatchService;
import java.nio.file.attribute.UserPrincipalLookupService;
import java.nio.file.spi.FileSystemProvider;
import java.util.Properties;
import java.util.Set;
import software.amazon.awssdk.services.s3.model.Bucket;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
public class S3FileSystem extends FileSystem {
private final S3FileSystemProvider provider;
private final S3Client client;
private final String endpoint;
private final String bucketName;
private final Properties properties;
public S3FileSystem(S3FileSystemProvider provider, S3Client client, URI uri, Properties props) {
this.provider = provider;
this.client = client;
this.endpoint = uri.getHost();
this.bucketName = S3Path.bucketName(uri);
this.properties = props;
}
@Override
public FileSystemProvider provider() {
return provider;
}
public Properties properties() {
return properties;
}
@Override
public void close() {
this.provider.fileSystems.remove(bucketName);
}
@Override
public boolean isOpen() {
return this.provider.fileSystems.containsKey(bucketName);
}
@Override
public boolean isReadOnly() {
return false;
}
@Override
public String getSeparator() {
return S3Path.PATH_SEPARATOR;
}
@Override
public Iterable<Path> getRootDirectories() {
ImmutableList.Builder<Path> builder = ImmutableList.builder();
try {
for (Bucket bucket : client.listBuckets()) {
builder.add(new S3Path(this, bucket.name()));
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return builder.build();
}
@Override
public Iterable<FileStore> getFileStores() {
return ImmutableList.of();
}
@Override
public Set<String> supportedFileAttributeViews() {
return ImmutableSet.of("basic");
}
@Override
public Path getPath(String first, String... more) {
if (more.length == 0) {
return new S3Path(this, first);
}
return new S3Path(this, first, more);
}
@Override
public PathMatcher getPathMatcher(String syntaxAndPattern) {
throw new UnsupportedOperationException();
}
@Override
public UserPrincipalLookupService getUserPrincipalLookupService() {
throw new UnsupportedOperationException();
}
@Override
public WatchService newWatchService() throws IOException {
throw new UnsupportedOperationException();
}
public S3Client getClient() {
return client;
}
/**
* get the endpoint associated with this fileSystem.
*
* @see <a href="http://docs.aws.amazon.com/general/latest/gr/rande.html">http://docs.aws.amazon.com/general/latest/gr/rande.html</a>
* @return string
*/
public String getEndpoint() {
return endpoint;
}
public String getBucketName() {
return bucketName;
}
}

View File

@@ -0,0 +1,857 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio;
import java.io.ByteArrayInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.AccessDeniedException;
import java.nio.file.AccessMode;
import java.nio.file.CopyOption;
import java.nio.file.DirectoryNotEmptyException;
import java.nio.file.DirectoryStream;
import java.nio.file.FileAlreadyExistsException;
import java.nio.file.FileStore;
import java.nio.file.FileSystem;
import java.nio.file.FileSystemAlreadyExistsException;
import java.nio.file.FileSystemNotFoundException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.NoSuchFileException;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption;
import java.nio.file.attribute.BasicFileAttributeView;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileAttribute;
import java.nio.file.attribute.FileAttributeView;
import java.nio.file.attribute.FileTime;
import java.nio.file.spi.FileSystemProvider;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import software.amazon.awssdk.core.ResponseInputStream;
import software.amazon.awssdk.services.s3.model.*;
import software.amazon.awssdk.services.s3.model.S3Object;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import nextflow.cloud.aws.AwsClientFactory;
import nextflow.cloud.aws.config.AwsConfig;
import nextflow.cloud.aws.nio.util.IOUtils;
import nextflow.cloud.aws.nio.util.S3MultipartOptions;
import nextflow.cloud.aws.nio.util.S3ObjectId;
import nextflow.cloud.aws.nio.util.S3ObjectSummaryLookup;
import nextflow.extension.FilesEx;
import nextflow.file.CopyOptions;
import nextflow.file.FileHelper;
import nextflow.file.FileSystemTransferAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.google.common.collect.Sets.difference;
import static java.lang.String.format;
/**
* Spec:
*
* URI: s3://[endpoint]/{bucket}/{key} If endpoint is missing, it's assumed to
* be the default S3 endpoint (s3.amazonaws.com)
*
* FileSystem roots: /{bucket}/
*
* Treatment of S3 objects: - If a key ends in "/" it's considered a directory
* *and* a regular file. Otherwise, it's just a regular file. - It is legal for
* a key "xyz" and "xyz/" to exist at the same time. The latter is treated as a
* directory. - If a file "a/b/c" exists but there's no "a" or "a/b/", these are
* considered "implicit" directories. They can be listed, traversed and deleted.
*
* Deviations from FileSystem provider API: - Deleting a file or directory
* always succeeds, regardless of whether the file/directory existed before the
* operation was issued i.e. Files.delete() and Files.deleteIfExists() are
* equivalent.
*
*
* Future versions of this provider might allow for a strict mode that mimics
* the semantics of the FileSystem provider API on a best effort basis, at an
* increased processing cost.
*
*
*/
public class S3FileSystemProvider extends FileSystemProvider implements FileSystemTransferAware {
private static final Logger log = LoggerFactory.getLogger(S3FileSystemProvider.class);
final Map<String, S3FileSystem> fileSystems = new HashMap<>();
private final S3ObjectSummaryLookup s3ObjectSummaryLookup = new S3ObjectSummaryLookup();
@Override
public String getScheme() {
return "s3";
}
@Override
public FileSystem newFileSystem(URI uri, Map<String, ?> env) throws IOException {
Preconditions.checkNotNull(uri, "uri is null");
Preconditions.checkArgument(uri.getScheme().equals("s3"), "uri scheme must be 's3': '%s'", uri);
final String bucketName = S3Path.bucketName(uri);
synchronized (fileSystems) {
if( fileSystems.containsKey(bucketName))
throw new FileSystemAlreadyExistsException("S3 filesystem already exists. Use getFileSystem() instead");
final AwsConfig awsConfig = new AwsConfig(env);
//
final S3FileSystem result = createFileSystem(uri, awsConfig);
fileSystems.put(bucketName, result);
return result;
}
}
@Override
public FileSystem getFileSystem(URI uri) {
final String bucketName = S3Path.bucketName(uri);
final FileSystem fileSystem = this.fileSystems.get(bucketName);
if (fileSystem == null) {
throw new FileSystemNotFoundException("S3 filesystem not yet created. Use newFileSystem() instead");
}
return fileSystem;
}
/**
* Deviation from spec: throws FileSystemNotFoundException if FileSystem
* hasn't yet been initialized. Call newFileSystem() first.
* Need credentials. Maybe set credentials after? how?
*/
@Override
public Path getPath(URI uri) {
Preconditions.checkArgument(uri.getScheme().equals(getScheme()),"URI scheme must be %s", getScheme());
return getFileSystem(uri).getPath(uri.getPath());
}
@Override
public DirectoryStream<Path> newDirectoryStream(Path dir, DirectoryStream.Filter<? super Path> filter) throws IOException {
Preconditions.checkArgument(dir instanceof S3Path,"path must be an instance of %s", S3Path.class.getName());
final S3Path s3Path = (S3Path) dir;
return new DirectoryStream<Path>() {
@Override
public void close() throws IOException {
// nothing to do here
}
@Override
public Iterator<Path> iterator() {
return new S3Iterator(s3Path.getFileSystem(), s3Path.getBucket(), s3Path.getKey() + "/");
}
};
}
@Override
public InputStream newInputStream(Path path, OpenOption... options)
throws IOException {
Preconditions.checkArgument(options.length == 0,
"OpenOptions not yet supported: %s",
ImmutableList.copyOf(options)); // TODO
Preconditions.checkArgument(path instanceof S3Path,
"path must be an instance of %s", S3Path.class.getName());
S3Path s3Path = (S3Path) path;
Preconditions.checkArgument(!s3Path.getKey().equals(""),
"cannot create InputStream for root directory: %s", FilesEx.toUriString(s3Path));
final ResponseInputStream<GetObjectResponse> result = s3Path
.getFileSystem()
.getClient()
.getObject(s3Path.getBucket(), s3Path.getKey());
if (result == null)
throw new IOException(String.format("The specified path is a directory: %s", FilesEx.toUriString(s3Path)));
// Wrap the response stream so that close() aborts the underlying HTTP connection
// instead of draining the remaining bytes. Apache HTTP client's ContentLengthInputStream.close()
// reads to end-of-stream to release the connection back to the pool, which for a large S3
// object (e.g. a multi-GB FASTQ) can block the caller for many minutes. Callers of
// newInputStream() typically do not consume the whole object, so abort() is the correct
// semantics here.
return new FilterInputStream(result) {
@Override
public void close() {
result.abort();
}
// Just-used for testing
void abort() {
result.abort();
}
};
}
@Override
public OutputStream newOutputStream(final Path path, final OpenOption... options) throws IOException {
Preconditions.checkArgument(path instanceof S3Path, "path must be an instance of %s", S3Path.class.getName());
S3Path s3Path = (S3Path)path;
// validate options
if (options.length > 0) {
Set<OpenOption> opts = new LinkedHashSet<>(Arrays.asList(options));
// cannot handle APPEND here -> use newByteChannel() implementation
if (opts.contains(StandardOpenOption.APPEND)) {
return super.newOutputStream(path, options);
}
if (opts.contains(StandardOpenOption.READ)) {
throw new IllegalArgumentException("READ not allowed");
}
boolean create = opts.remove(StandardOpenOption.CREATE);
boolean createNew = opts.remove(StandardOpenOption.CREATE_NEW);
boolean truncateExisting = opts.remove(StandardOpenOption.TRUNCATE_EXISTING);
// remove irrelevant/ignored options
opts.remove(StandardOpenOption.WRITE);
opts.remove(StandardOpenOption.SPARSE);
if (!opts.isEmpty()) {
throw new UnsupportedOperationException(opts.iterator().next() + " not supported");
}
if (!(create && truncateExisting)) {
if (exists(s3Path)) {
if (createNew || !truncateExisting) {
throw new FileAlreadyExistsException(FilesEx.toUriString(s3Path));
}
} else {
if (!createNew && !create) {
throw new NoSuchFileException(FilesEx.toUriString(s3Path));
}
}
}
}
return createUploaderOutputStream(s3Path);
}
@Override
public boolean canUpload(Path source, Path target) {
return FileSystems.getDefault().equals(source.getFileSystem()) && target instanceof S3Path;
}
@Override
public boolean canDownload(Path source, Path target) {
return source instanceof S3Path && FileSystems.getDefault().equals(target.getFileSystem());
}
@Override
public void download(Path remoteFile, Path localDestination, CopyOption... options) throws IOException {
final S3Path source = (S3Path)remoteFile;
final CopyOptions opts = CopyOptions.parse(options);
// delete target if it exists and REPLACE_EXISTING is specified
if (opts.replaceExisting()) {
FileHelper.deletePath(localDestination);
}
else if (Files.exists(localDestination))
throw new FileAlreadyExistsException(localDestination.toString());
// Read S3 file attributes (metadata) for the source path, returns Optional.empty() if file doesn't exist
final Optional<S3FileAttributes> attrs = readAttr1(source);
// Extract directory status from attributes, defaulting to false if no attributes found
final boolean isDir = attrs.map(S3FileAttributes::isDirectory).orElse(false);
// Get file size only for non-directories (directories have size 0), defaulting to 0L if no attributes
final long size = attrs.filter(a -> !a.isDirectory()).map(S3FileAttributes::size).orElse(0L);
final String type = isDir ? "directory": "file";
final S3Client s3Client = source.getFileSystem().getClient();
log.debug("S3 download {} from={} to={} size={}", type, FilesEx.toUriString(source), localDestination, size);
if( isDir ) {
s3Client.downloadDirectory(source, localDestination.toFile());
}
else if( size > 0 ) {
s3Client.downloadFile(source, localDestination.toFile(), size);
}
else {
Files.deleteIfExists(localDestination);
Files.createFile(localDestination);
}
}
@Override
public void upload(Path localFile, Path remoteDestination, CopyOption... options) throws IOException {
final S3Path target = (S3Path) remoteDestination;
CopyOptions opts = CopyOptions.parse(options);
LinkOption[] linkOptions = (opts.followLinks()) ? new LinkOption[0] : new LinkOption[] { LinkOption.NOFOLLOW_LINKS };
// attributes of source file
if (Files.readAttributes(localFile, BasicFileAttributes.class, linkOptions).isSymbolicLink())
throw new IOException("Uploading of symbolic links not supported - offending path: " + localFile);
final Optional<S3FileAttributes> attrs = readAttr1(target);
final boolean exits = attrs.isPresent();
// delete target if it exists and REPLACE_EXISTING is specified
if (opts.replaceExisting()) {
FileHelper.deletePath(target);
}
else if ( exits )
throw new FileAlreadyExistsException(target.toString());
final boolean isDir = Files.isDirectory(localFile);
final String type = isDir ? "directory": "file";
log.debug("S3 upload {} from={} to={}", type, localFile, FilesEx.toUriString(target));
final S3Client s3Client = target.getFileSystem().getClient();
if( isDir ) {
s3Client.uploadDirectory(localFile.toFile(), target);
}
else {
s3Client.uploadFile(localFile.toFile(), target);
}
}
private S3OutputStream createUploaderOutputStream( S3Path fileToUpload ) {
S3Client s3 = fileToUpload.getFileSystem().getClient();
Properties props = fileToUpload.getFileSystem().properties();
final String storageClass = fileToUpload.getStorageClass()!=null ? fileToUpload.getStorageClass() : props.getProperty("upload_storage_class");
final S3MultipartOptions opts = props != null ? new S3MultipartOptions(props) : new S3MultipartOptions();
final S3ObjectId objectId = fileToUpload.toS3ObjectId();
S3OutputStream stream = new S3OutputStream(s3.getClient(), objectId, opts)
.setCannedAcl(s3.getCannedAcl())
.setStorageClass(storageClass)
.setStorageEncryption(props.getProperty("storage_encryption"))
.setKmsKeyId(props.getProperty("storage_kms_key_id"))
.setContentType(fileToUpload.getContentType())
.setTags(fileToUpload.getTagsList());
return stream;
}
@Override
public SeekableByteChannel newByteChannel(Path path,
Set<? extends OpenOption> options, FileAttribute<?>... attrs)
throws IOException {
Preconditions.checkArgument(path instanceof S3Path,
"path must be an instance of %s", S3Path.class.getName());
final S3Path s3Path = (S3Path) path;
// we resolve to a file inside the temp folder with the s3path name
final Path tempFile = createTempDir().resolve(path.getFileName().toString());
try {
InputStream is = s3Path.getFileSystem().getClient()
.getObject(s3Path.getBucket(), s3Path.getKey());
if (is == null)
throw new IOException(String.format("The specified path is a directory: %s", path));
Files.write(tempFile, IOUtils.toByteArray(is));
}
catch (NoSuchFileException e) {
// When opening for CREATE/CREATE_NEW the remote object is allowed to not exist yet
// — the temp file will be created and uploaded on close. For any other open mode
// propagate the original exception so the caller sees the real s3:// path.
if (!options.contains(StandardOpenOption.CREATE) && !options.contains(StandardOpenOption.CREATE_NEW)) {
throw e;
}
log.trace("S3 object does not exist yet, will be created on close: {}", FilesEx.toUriString(s3Path));
}
// and we can use the File SeekableByteChannel implementation
final SeekableByteChannel seekable = Files .newByteChannel(tempFile, options);
final List<Tag> tags = ((S3Path) path).getTagsList();
final String contentType = ((S3Path) path).getContentType();
return new SeekableByteChannel() {
@Override
public boolean isOpen() {
return seekable.isOpen();
}
@Override
public void close() throws IOException {
if (!seekable.isOpen()) {
return;
}
seekable.close();
// upload the content where the seekable ends (close)
if (Files.exists(tempFile)) {
try (InputStream stream = Files.newInputStream(tempFile)) {
/*
FIXME: if the stream is {@link InputStream#markSupported()} i can reuse the same stream
and evict the close and open methods of probeContentType. By this way:
metadata.setContentType(new Tika().detect(stream, tempFile.getFileName().toString()));
*/
s3Path.getFileSystem()
.getClient()
.putObject(s3Path.getBucket(), s3Path.getKey(), stream, tags, contentType, Files.size(tempFile));
}
}
else {
// delete: check option delete_on_close
s3Path.getFileSystem().
getClient().deleteObject(s3Path.getBucket(), s3Path.getKey());
}
// and delete the temp dir
Files.deleteIfExists(tempFile);
Files.deleteIfExists(tempFile.getParent());
}
@Override
public int write(ByteBuffer src) throws IOException {
return seekable.write(src);
}
@Override
public SeekableByteChannel truncate(long size) throws IOException {
return seekable.truncate(size);
}
@Override
public long size() throws IOException {
return seekable.size();
}
@Override
public int read(ByteBuffer dst) throws IOException {
return seekable.read(dst);
}
@Override
public SeekableByteChannel position(long newPosition)
throws IOException {
return seekable.position(newPosition);
}
@Override
public long position() throws IOException {
return seekable.position();
}
};
}
/**
* Deviations from spec: Does not perform atomic check-and-create. Since a
* directory is just an S3 object, all directories in the hierarchy are
* created or it already existed.
*/
@Override
public void createDirectory(Path dir, FileAttribute<?>... attrs)
throws IOException {
// FIXME: throw exception if the same key already exists at amazon s3
S3Path s3Path = (S3Path) dir;
Preconditions.checkArgument(attrs.length == 0,
"attrs not yet supported: %s", ImmutableList.copyOf(attrs)); // TODO
// Creating a bucket is not supported
if (s3Path.getKey().isEmpty()) {
throw new UnsupportedOperationException("Creating a bucket is not supported");
}
List<Tag> tags = s3Path.getTagsList();
String keyName = s3Path.getKey()
+ (s3Path.getKey().endsWith("/") ? "" : "/");
s3Path.getFileSystem()
.getClient()
.putObject(s3Path.getBucket(), keyName, new ByteArrayInputStream(new byte[0]), tags, null, 0);
}
@Override
public void delete(Path path) throws IOException {
Preconditions.checkArgument(path instanceof S3Path,
"path must be an instance of %s", S3Path.class.getName());
S3Path s3Path = (S3Path) path;
if (Files.notExists(path)) {
throw new NoSuchFileException("the path: " + FilesEx.toUriString(s3Path) + " does not exist");
}
// Deleting a bucket is not supported
if (s3Path.getKey().isEmpty()) {
throw new UnsupportedOperationException("Deleting a bucket is not supported");
}
// NOTE: S3 directories are virtual (marker objects or implied key prefixes),
// so we do not check for emptiness before deleting. Enforcing POSIX-like
// DirectoryNotEmptyException semantics on S3 is unreliable due to eventual
// consistency and unnecessary because deleting a directory marker does not
// affect its children.
// we delete the two objects (sometimes exists the key '/' and sometimes not)
s3Path.getFileSystem().getClient()
.deleteObject(s3Path.getBucket(), s3Path.getKey());
s3Path.getFileSystem().getClient()
.deleteObject(s3Path.getBucket(), s3Path.getKey() + "/");
}
@Override
public void copy(Path source, Path target, CopyOption... options)
throws IOException {
Preconditions.checkArgument(source instanceof S3Path,
"source must be an instance of %s", S3Path.class.getName());
Preconditions.checkArgument(target instanceof S3Path,
"target must be an instance of %s", S3Path.class.getName());
if (isSameFile(source, target)) {
return;
}
S3Path s3Source = (S3Path) source;
S3Path s3Target = (S3Path) target;
/*
* Preconditions.checkArgument(!s3Source.isDirectory(),
* "copying directories is not yet supported: %s", source); // TODO
* Preconditions.checkArgument(!s3Target.isDirectory(),
* "copying directories is not yet supported: %s", target); // TODO
*/
ImmutableSet<CopyOption> actualOptions = ImmutableSet.copyOf(options);
verifySupportedOptions(EnumSet.of(StandardCopyOption.REPLACE_EXISTING),
actualOptions);
if (!actualOptions.contains(StandardCopyOption.REPLACE_EXISTING)) {
if (exists(s3Target)) {
throw new FileAlreadyExistsException(format(
"target already exists: %s", FilesEx.toUriString(s3Target)));
}
}
S3Client client = s3Source.getFileSystem() .getClient();
final List<Tag> tags = ((S3Path) target).getTagsList();
final String contentType = ((S3Path) target).getContentType();
final String storageClass = ((S3Path) target).getStorageClass();
//TransferManager alternative
CopyObjectRequest.Builder reqBuilder = CopyObjectRequest.builder()
.sourceBucket(s3Source.getBucket())
.sourceKey(s3Source.getKey())
.destinationBucket(s3Target.getBucket())
.destinationKey(s3Target.getKey());
log.trace("Copy file via copy object - source: source={}, target={}, tags={}, storageClass={}", s3Source, s3Target, tags, storageClass);
client.copyFile(reqBuilder, tags, contentType, storageClass);
}
@Override
public void move(Path source, Path target, CopyOption... options) throws IOException {
for( CopyOption it : options ) {
if( it==StandardCopyOption.ATOMIC_MOVE )
throw new IllegalArgumentException("Atomic move not supported by S3 file system provider");
}
copy(source,target,options);
delete(source);
}
@Override
public boolean isSameFile(Path path1, Path path2) throws IOException {
return path1.isAbsolute() && path2.isAbsolute() && path1.equals(path2);
}
@Override
public boolean isHidden(Path path) throws IOException {
return false;
}
@Override
public FileStore getFileStore(Path path) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void checkAccess(Path path, AccessMode... modes) throws IOException {
S3Path s3Path = (S3Path) path;
Preconditions.checkArgument(s3Path.isAbsolute(),
"path must be absolute: %s", s3Path);
S3Client client = s3Path.getFileSystem().getClient();
if( modes==null || modes.length==0 ) {
// when no modes are given, the method is invoked
// by `Files.exists` method, therefore just use summary lookup
s3ObjectSummaryLookup.lookup((S3Path)path);
return;
}
// get ACL and check if the file exists as a side-effect
AccessControlPolicy acl = getAccessControl(s3Path);
String caller = client.getCallerAccount();
for (AccessMode accessMode : modes) {
switch (accessMode) {
case EXECUTE:
throw new AccessDeniedException(s3Path.toString(), null,
"file is not executable");
case READ:
if (caller == null) {
//if we cannot get the user's canonical ID, try read the object;
s3ObjectSummaryLookup.lookup((S3Path) path);
}
else if (!hasPermissions(acl, caller,
EnumSet.of(Permission.FULL_CONTROL, Permission.READ))) {
throw new AccessDeniedException(s3Path.toString(), null,
"file is not readable");
}
break;
case WRITE:
if (caller == null) {
log.warn("User's Canonical Id cannot be retrieved. We can not check the access.");
}
else if (!hasPermissions(acl, caller,
EnumSet.of(Permission.FULL_CONTROL, Permission.WRITE))) {
throw new AccessDeniedException(s3Path.toString(), null,
format("bucket '%s' is not writable",
s3Path.getBucket()));
}
break;
}
}
}
/**
* check if the param acl has the same owner than the parameter owner and
* have almost one of the permission set in the parameter permissions
* @param acl
* @param owner
* @param permissions almost one
* @return
*/
private boolean hasPermissions(AccessControlPolicy acl, String owner,
EnumSet<Permission> permissions) {
boolean result = false;
for (Grant grant : acl.grants()) {
if (grant.grantee().id().equals(owner)
&& permissions.contains(grant.permission())) {
result = true;
break;
}
}
return result;
}
@Override
public <V extends FileAttributeView> V getFileAttributeView(Path path, Class<V> type, LinkOption... options) {
Preconditions.checkArgument(path instanceof S3Path,
"path must be an instance of %s", S3Path.class.getName());
S3Path s3Path = (S3Path) path;
if (type.isAssignableFrom(BasicFileAttributeView.class)) {
try {
return (V) new S3FileAttributesView(readAttr0(s3Path));
}
catch (IOException e) {
throw new RuntimeException("Unable read attributes for file: " + FilesEx.toUriString(s3Path), e);
}
}
log.trace("Unsupported S3 file system provider file attribute view: " + type.getName());
return null;
}
@Override
public <A extends BasicFileAttributes> A readAttributes(Path path, Class<A> type, LinkOption... options) throws IOException {
Preconditions.checkArgument(path instanceof S3Path,
"path must be an instance of %s", S3Path.class.getName());
S3Path s3Path = (S3Path) path;
if (type.isAssignableFrom(BasicFileAttributes.class)) {
return (A) ("".equals(s3Path.getKey())
// the root bucket is implicitly a directory
? new S3FileAttributes("/", null, 0, true, false)
// read the target path attributes
: readAttr0(s3Path));
}
// not support attribute class
throw new UnsupportedOperationException(format("only %s supported", BasicFileAttributes.class));
}
private Optional<S3FileAttributes> readAttr1(S3Path s3Path) throws IOException {
try {
return Optional.of(readAttr0(s3Path));
}
catch (NoSuchFileException e) {
return Optional.<S3FileAttributes>empty();
}
}
private S3FileAttributes readAttr0(S3Path s3Path) throws IOException {
S3Object objectSummary = s3ObjectSummaryLookup.lookup(s3Path);
// parse the data to BasicFileAttributes.
FileTime lastModifiedTime = null;
if( objectSummary.lastModified() != null ) {
lastModifiedTime = FileTime.from(objectSummary.lastModified().toEpochMilli(), TimeUnit.MILLISECONDS);
}
long size = objectSummary.size();
boolean directory = false;
boolean regularFile = false;
String key = objectSummary.key();
// check if is a directory and the key of this directory exists in amazon s3
if (objectSummary.key().equals(s3Path.getKey() + "/") && objectSummary.key().endsWith("/")) {
directory = true;
}
// is a directory but does not exist in amazon s3
else if ((!objectSummary.key().equals(s3Path.getKey()) || "".equals(s3Path.getKey())) && objectSummary.key().startsWith(s3Path.getKey())) {
directory = true;
// no metadata, we fake one
size = 0;
// delete extra part
key = s3Path.getKey() + "/";
}
// is a file:
else {
regularFile = true;
}
return new S3FileAttributes(key, lastModifiedTime, size, directory, regularFile);
}
@Override
public Map<String, Object> readAttributes(Path path, String attributes, LinkOption... options) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void setAttribute(Path path, String attribute, Object value,
LinkOption... options) throws IOException {
throw new UnsupportedOperationException();
}
protected S3FileSystem createFileSystem(URI uri, AwsConfig awsConfig) {
// try to load amazon props
Properties props = loadAmazonProperties();
// add properties for legacy compatibility
props.putAll(awsConfig.getS3LegacyProperties());
final String bucketName = S3Path.bucketName(uri);
// do not use `global` flag for custom endpoint because
// when enabling that flag, it overrides S3 endpoints with AWS global endpoint
// see https://github.com/nextflow-io/nextflow/pull/5779
final boolean global = bucketName!=null && !awsConfig.getS3Config().isCustomEndpoint();
final AwsClientFactory factory = new AwsClientFactory(awsConfig, awsConfig.resolveS3Region());
final S3Client client = new S3Client(factory, props, global);
// set the client acl
client.setCannedAcl(getProp(props, "s_3_acl", "s3_acl", "s3acl", "s3Acl"));
client.setStorageEncryption(props.getProperty("storage_encryption"));
client.setKmsKeyId(props.getProperty("storage_kms_key_id"));
client.setRequesterPaysEnabled(props.getProperty("requester_pays"));
if( props.getProperty("glacier_auto_retrieval") != null )
log.warn("Glacier auto-retrieval is no longer supported, config option `aws.client.glacierAutoRetrieval` will be ignored");
return new S3FileSystem(this, client, uri, props);
}
protected String getProp(Properties props, String... keys) {
for( String k : keys ) {
if( props.containsKey(k) ) {
return props.getProperty(k);
}
}
return null;
}
/**
* find /amazon.properties in the classpath
* @return Properties amazon.properties
*/
protected Properties loadAmazonProperties() {
Properties props = new Properties();
// http://www.javaworld.com/javaworld/javaqa/2003-06/01-qa-0606-load.html
// http://www.javaworld.com/javaqa/2003-08/01-qa-0808-property.html
try(InputStream in = Thread.currentThread().getContextClassLoader().getResourceAsStream("amazon.properties")) {
if (in != null) {
props.load(in);
}
} catch (IOException e) {}
return props;
}
// ~~~
private <T> void verifySupportedOptions(Set<? extends T> allowedOptions,
Set<? extends T> actualOptions) {
Sets.SetView<? extends T> unsupported = difference(actualOptions,
allowedOptions);
Preconditions.checkArgument(unsupported.isEmpty(),
"the following options are not supported: %s", unsupported);
}
/**
* check that the paths exists or not
* @param path S3Path
* @return true if exists
*/
private boolean exists(S3Path path) {
try {
s3ObjectSummaryLookup.lookup(path);
return true;
}
catch (IOException e) {
return false;
}
}
/**
* Get the Control List, if the path does not exist
* (because the path is a directory and this key isn't created at amazon s3)
* then return the ACL of the first child.
*
* @param path {@link S3Path}
* @return AccessControlList
* @throws IOException if error getting access control
*/
private AccessControlPolicy getAccessControl(S3Path path) throws IOException {
String key = path.getKey();
if (key == null || key.isEmpty())
return path.getFileSystem().getClient().getBucketAcl(path.getBucket());
return path.getFileSystem().getClient().getObjectAcl(path.getBucket(), key);
}
/**
* create a temporal directory to create streams
* @return Path temporal folder
* @throws IOException
*/
protected Path createTempDir() throws IOException {
return Files.createTempDirectory("temp-s3-");
}
}

View File

@@ -0,0 +1,157 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import software.amazon.awssdk.services.s3.model.*;
import com.google.common.base.Preconditions;
/**
* S3 iterator over folders at first level.
* Future versions of this class should be return the elements
* in a incremental way when the #next() method is called.
*/
public class S3Iterator implements Iterator<Path> {
private S3FileSystem s3FileSystem;
private String bucket;
private String key;
private Iterator<S3Path> it;
public S3Iterator(S3FileSystem s3FileSystem, String bucket, String key) {
Preconditions.checkArgument(key != null && key.endsWith("/"), "key %s should be ended with slash '/'", key);
this.bucket = bucket;
// the only case i dont need the end slash is to list buckets content
this.key = key.length() == 1 ? "" : key;
this.s3FileSystem = s3FileSystem;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public S3Path next() {
return getIterator().next();
}
@Override
public boolean hasNext() {
return getIterator().hasNext();
}
private Iterator<S3Path> getIterator() {
if (it == null) {
ListObjectsV2Request request = buildRequest();
S3Client s3Client = s3FileSystem.getClient();
// This automatically handles pagination
try {
it = s3Client.listObjectsV2Paginator(request).stream().flatMap(r -> parseObjectListing(r).stream()).iterator();
}catch( IOException e ){
throw new UncheckedIOException(e);
}
}
return it;
}
private ListObjectsV2Request buildRequest(){
return ListObjectsV2Request.builder()
.bucket(bucket)
.prefix(key)
.delimiter("/")
.build();
}
/**
* add to the listPath the elements at the same level that s3Path
* @param current ListObjectsResponseto walk
*/
private List<S3Path> parseObjectListing( ListObjectsV2Response current) {
List<S3Path> listPath = new ArrayList<>();
// add all the objects i.e. the files, except iterator key.
// In V2, object listing is also returning the key of the request. Skip it from the iterator to avoid loops.
for (final S3Object objectSummary : current.contents()) {
final String key = objectSummary.key();
if( this.key.equals(key)) continue;
final S3Path path = new S3Path(s3FileSystem, "/" + bucket, key.split("/"));
path.setObjectSummary(objectSummary);
listPath.add(path);
}
// add all the common prefixes i.e. the directories, except iterator key
for(final CommonPrefix prefix : current.commonPrefixes()) {
if( prefix.prefix().equals("/") || this.key.equals(prefix.prefix())) continue;
listPath.add(new S3Path(s3FileSystem, "/" + bucket, prefix.prefix()));
}
return listPath;
}
/**
* The current #buildRequest() get all subdirectories and her content.
* This method filter the keyChild and check if is a immediate
* descendant of the keyParent parameter
* @param keyParent String
* @param keyChild String
* @return String parsed
* or null when the keyChild and keyParent are the same and not have to be returned
*/
@Deprecated
private String getInmediateDescendent(String keyParent, String keyChild){
keyParent = deleteExtraPath(keyParent);
keyChild = deleteExtraPath(keyChild);
final int parentLen = keyParent.length();
final String childWithoutParent = deleteExtraPath(keyChild
.substring(parentLen));
String[] parts = childWithoutParent.split("/");
if (parts.length > 0 && !parts[0].isEmpty()){
return keyParent + "/" + parts[0];
}
else {
return null;
}
}
@Deprecated
private String deleteExtraPath(String keyChild) {
if (keyChild.startsWith("/")){
keyChild = keyChild.substring(1);
}
if (keyChild.endsWith("/")){
keyChild = keyChild.substring(0, keyChild.length() - 1);
}
return keyChild;
}
}

View File

@@ -0,0 +1,662 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.nio.ByteBuffer;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Base64;
import java.util.Comparator;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.Phaser;
import java.util.concurrent.atomic.AtomicInteger;
import software.amazon.awssdk.core.exception.SdkException;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.*;
import nextflow.cloud.aws.nio.util.ByteBufferInputStream;
import nextflow.cloud.aws.nio.util.S3MultipartOptions;
import nextflow.cloud.aws.nio.util.S3ObjectId;
import nextflow.util.ThreadPoolManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.util.Objects.requireNonNull;
/**
* Parallel S3 multipart uploader. Based on the following code request
* See https://github.com/Upplication/Amazon-S3-FileSystem-NIO2/pulls
*
* @author Paolo Di Tommaso
* @author Tom Wieczorek
*/
public final class S3OutputStream extends OutputStream {
private static final Logger log = LoggerFactory.getLogger(S3OutputStream.class);
/**
* Minimum multipart chunk size 5MB
* https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
*/
private static final int MIN_MULTIPART_UPLOAD = 5 * 1024 * 1024;
/**
* Amazon S3 API implementation to use.
*/
private final S3Client s3;
/**
* ID of the S3 object to store data into.
*/
private final S3ObjectId objectId;
/**
* Amazon S3 storage class to apply to the newly created S3 object, if any.
*/
private StorageClass storageClass;
private ServerSideEncryption storageEncryption;
private String kmsKeyId;
private String contentType;
/**
* Indicates if the stream has been closed.
*/
private volatile boolean closed;
/**
* Indicates if the upload has been aborted
*/
private volatile boolean aborted;
/**
* If a multipart upload is in progress, holds the ID for it, {@code null} otherwise.
*/
private volatile String uploadId;
/**
* If a multipart upload is in progress, holds the ETags of the uploaded parts, {@code null} otherwise.
*/
private Queue<CompletedPart> completedParts;
/**
* Holds upload request metadata
*/
private final S3MultipartOptions request;
/**
* Instead of allocate a new buffer for each chunks recycle them, putting
* a buffer instance into this queue when the upload process is completed
*/
final private Queue<ByteBuffer> bufferPool = new ConcurrentLinkedQueue<ByteBuffer>();
/**
* The executor service (thread pool) which manages the upload in background
*/
private ExecutorService executor;
/**
* The current working buffer
*/
private ByteBuffer buf;
private MessageDigest md5;
/**
* Phaser object to synchronize stream termination
*/
private Phaser phaser;
/**
* Count the number of uploaded chunks
*/
private int partsCount;
private int bufferSize;
private ObjectCannedACL cannedAcl;
private List<Tag> tags;
private final AtomicInteger bufferCounter = new AtomicInteger();
/**
* Creates a new {@code S3OutputStream} that writes data directly into the S3 object with the given {@code objectId}.
* No special object metadata or storage class will be attached to the object.
*
*/
public S3OutputStream(final S3Client s3, S3ObjectId objectId, S3MultipartOptions request) {
this.s3 = requireNonNull(s3);
this.objectId = requireNonNull(objectId);
this.request = request;
this.bufferSize = request.getBufferSize();
}
private ByteBuffer expandBuffer(ByteBuffer byteBuffer) {
final float expandFactor = 2.5f;
final int newCapacity = Math.min( (int)(byteBuffer.capacity() * expandFactor), bufferSize );
// cast to prevent Java 8 / Java 11 cross compile-runtime error
// https://www.morling.dev/blog/bytebuffer-and-the-dreaded-nosuchmethoderror/
((java.nio.Buffer)byteBuffer).flip();
ByteBuffer expanded = ByteBuffer.allocate(newCapacity);
expanded.order(byteBuffer.order());
expanded.put(byteBuffer);
return expanded;
}
public S3OutputStream setCannedAcl(ObjectCannedACL acl) {
this.cannedAcl = acl;
return this;
}
public S3OutputStream setTags(List<Tag> tags) {
this.tags = tags;
return this;
}
public S3OutputStream setStorageClass(String storageClass) {
if( storageClass!=null )
this.storageClass = StorageClass.fromValue(storageClass);
return this;
}
public S3OutputStream setStorageEncryption(String storageEncryption) {
if( storageEncryption!=null )
this.storageEncryption = ServerSideEncryption.fromValue(storageEncryption);
return this;
}
public S3OutputStream setKmsKeyId(String kmsKeyId) {
this.kmsKeyId = kmsKeyId;
return this;
}
public S3OutputStream setContentType(String type) {
this.contentType = type;
return this;
}
/**
* @return A MD5 message digester
*/
private MessageDigest createMd5() {
try {
return MessageDigest.getInstance("MD5");
}
catch(NoSuchAlgorithmException e) {
throw new IllegalStateException("Cannot find a MD5 algorithm provider",e);
}
}
/**
* Writes a byte into the uploader buffer. When it is full starts the upload process
* in a asynchronous manner
*
* @param b The byte to be written
* @throws IOException
*/
@Override
public void write (int b) throws IOException {
if( closed ){
throw new IOException("Can't write into a closed stream");
}
if( buf == null ) {
buf = allocate();
md5 = createMd5();
}
else if( !buf.hasRemaining() ) {
if( buf.position() < bufferSize ) {
buf = expandBuffer(buf);
}
else {
flush();
// create a new buffer
buf = allocate();
md5 = createMd5();
}
}
buf.put((byte) b);
// update the md5 checksum
md5.update((byte) b);
}
/**
* Flush the current buffer uploading to S3 storage
*
* @throws IOException
*/
@Override
public void flush() throws IOException {
// send out the current buffer
if( uploadBuffer(buf, false) ) {
// clear the current buffer
buf = null;
md5 = null;
}
}
private ByteBuffer allocate() {
if( partsCount==0 ) {
// this class is expected to be used to upload small files
// start with a small buffer and growth if more space if necessary
final int initialSize = 100 * 1024;
return ByteBuffer.allocate(initialSize);
}
// try to reuse a buffer from the poll
ByteBuffer result = bufferPool.poll();
if( result != null ) {
result.clear();
}
else {
// allocate a new buffer
log.debug("Allocating new buffer of {} bytes, total buffers {}", bufferSize, bufferCounter.incrementAndGet());
result = ByteBuffer.allocate(bufferSize);
}
return result;
}
/**
* Upload the given buffer to S3 storage in a asynchronous manner.
* NOTE: when the executor service is busy (i.e. there are any more free threads)
* this method will block
*
* return: true if the buffer can be reused, false if still needs to be used
*/
private boolean uploadBuffer(ByteBuffer buf, boolean last) throws IOException {
// when the buffer is empty nothing to do
if( buf == null || buf.position()==0 ) { return false; }
// Intermediate uploads needs to have at least MIN bytes
if( buf.position() < MIN_MULTIPART_UPLOAD && !last){
return false;
}
if (partsCount == 0) {
init();
}
// set the buffer in read mode and submit for upload
executor.submit( task(buf, md5.digest(), ++partsCount) );
return true;
}
/**
* Initialize multipart upload data structures
*
* @throws IOException
*/
private void init() throws IOException {
// get the upload id
uploadId = initiateMultipartUpload().uploadId();
if (uploadId == null) {
throw new IOException("Failed to get a valid multipart upload ID from Amazon S3");
}
// create the executor
executor = getOrCreateExecutor(request.getMaxThreads());
completedParts = new LinkedBlockingQueue<>();
phaser = new Phaser();
phaser.register();
log.trace("[S3 phaser] Register - Starting S3 upload: {}; chunk-size: {}; max-threads: {}", uploadId, bufferSize, request.getMaxThreads());
}
/**
* Creates a {@link Runnable} task to handle the upload process
* in background
*
* @param buffer The buffer to be uploaded
* @param partIndex The index count
* @return
*/
private Runnable task(final ByteBuffer buffer, final byte[] checksum, final int partIndex) {
phaser.register();
log.trace("[S3 phaser] Task register");
return new Runnable() {
@Override
public void run() {
try {
uploadPart(buffer, checksum, partIndex, false);
}
catch (IOException e) {
final StringWriter writer = new StringWriter();
e.printStackTrace(new PrintWriter(writer));
log.error("Upload: {} > Error for part: {}\nCaused by: {}", uploadId, partIndex, writer.toString());
}
finally {
log.trace("[S3 phaser] Task arriveAndDeregisterphaser");
phaser.arriveAndDeregister();
}
}
};
}
/**
* Close the stream uploading any remaining buffered data
*
* @throws IOException
*/
@Override
public void close() throws IOException {
if (closed) {
return;
}
if (uploadId == null) {
if( buf != null )
putObject(buf, md5.digest());
else
// this is needed when trying to upload an empty
putObject(new ByteArrayInputStream(new byte[]{}), 0, createMd5().digest());
}
else {
// -- upload remaining chunk
if( buf != null )
uploadBuffer(buf, true);
// -- shutdown upload executor and await termination
log.trace("[S3 phaser] Close arriveAndAwaitAdvance");
phaser.arriveAndAwaitAdvance();
// -- complete upload process
completeMultipartUpload();
}
closed = true;
}
/**
* Starts the multipart upload process
*
* @return An instance of {@link CreateMultipartUploadResponse}
* @throws IOException
*/
private CreateMultipartUploadResponse initiateMultipartUpload() throws IOException {
final CreateMultipartUploadRequest.Builder reqBuilder = //
CreateMultipartUploadRequest.builder().bucket(objectId.bucket()).key(objectId.key());
if (storageClass != null) {
reqBuilder.storageClass(storageClass);
}
if( cannedAcl != null ) {
reqBuilder.acl(cannedAcl);
}
if( kmsKeyId !=null ) {
reqBuilder.ssekmsKeyId(kmsKeyId);
}
if( storageEncryption != null ) {
reqBuilder.serverSideEncryption(storageEncryption);
}
if( contentType != null ) {
reqBuilder.contentType(contentType);
}
final CreateMultipartUploadRequest request = reqBuilder.build();
if( log.isTraceEnabled() ) {
log.trace("S3 initiateMultipartUpload {}", request);
}
try {
return s3.createMultipartUpload(request);
} catch (final SdkException e) {
throw new IOException("Failed to initiate Amazon S3 multipart upload", e);
}
}
/**
* Upload the given buffer to the S3 storage using a multipart process
*
* @param buf The buffer holding the data to upload
* @param partNumber The progressive index of this chunk (1-based)
* @param lastPart {@code true} when it is the last chunk
* @throws IOException
*/
private void uploadPart( final ByteBuffer buf, final byte[] checksum, final int partNumber, final boolean lastPart ) throws IOException {
// cast to prevent Java 8 / Java 11 cross compile-runtime error
// https://www.morling.dev/blog/bytebuffer-and-the-dreaded-nosuchmethoderror/
((java.nio.Buffer)buf).flip();
((java.nio.Buffer)buf).mark();
int attempt=0;
boolean success=false;
try {
while( !success ) {
attempt++;
int len = buf.limit();
try {
log.trace("Uploading part {} with length {} attempt {} for {} ", partNumber, len, attempt, objectId);
uploadPart( new ByteBufferInputStream(buf), len, checksum , partNumber, lastPart );
success=true;
}
catch (SdkException | IOException e) {
if( attempt == request.getMaxAttempts() )
throw new IOException("Failed to upload multipart data to Amazon S3", e);
log.debug("Failed to upload part {} attempt {} for {} -- Caused by: {}", partNumber, attempt, objectId, e.getMessage());
sleep(request.getRetrySleep());
buf.reset();
}
}
}
finally {
if (!success) {
closed = true;
abortMultipartUpload();
}
bufferPool.offer(buf);
}
}
private void uploadPart(final InputStream content, final long contentLength, final byte[] checksum, final int partNumber, final boolean lastPart)
throws IOException {
if (aborted) return;
final UploadPartRequest.Builder reqBuilder = UploadPartRequest.builder();
reqBuilder.bucket(objectId.bucket());
reqBuilder.key(objectId.key());
reqBuilder.uploadId(uploadId);
reqBuilder.partNumber(partNumber);
reqBuilder.contentLength(contentLength);
reqBuilder.contentMD5(Base64.getEncoder().encodeToString(checksum));
final UploadPartResponse resp = s3.uploadPart(reqBuilder.build(), RequestBody.fromInputStream(content, contentLength));
log.trace("Uploaded part {} with length {} for {}: {}", partNumber, contentLength, objectId, resp.eTag());
completedParts.add(CompletedPart.builder()
.partNumber(partNumber)
.eTag(resp.eTag())
.build());
}
private void sleep( long millis ) {
try {
Thread.sleep(millis);
}
catch (InterruptedException e) {
log.trace("Sleep was interrupted -- Cause: {}", e.getMessage());
}
}
/**
* Aborts the multipart upload process
*/
private synchronized void abortMultipartUpload() {
if (aborted) return;
log.debug("Aborting multipart upload {} for {}", uploadId, objectId);
try {
s3.abortMultipartUpload(AbortMultipartUploadRequest.builder().bucket(objectId.bucket()).key(objectId.key()).uploadId(uploadId).build());
}
catch (final SdkException e) {
log.warn("Failed to abort multipart upload {}: {}", uploadId, e.getMessage());
}
aborted = true;
log.trace("[S3 phaser] MultipartUpload arriveAndDeregister");
phaser.arriveAndDeregister();
}
/**
* Completes the multipart upload process
* @throws IOException
*/
private void completeMultipartUpload() throws IOException {
// if aborted upload just ignore it
if( aborted ) return;
final int partCount = completedParts.size();
log.trace("Completing upload to {} consisting of {} parts", objectId, partCount);
//Ensure parts are sorted by partNumber
CompletedPart[] parts = completedParts.stream()
.sorted(Comparator.comparingInt(CompletedPart::partNumber))
.toArray(CompletedPart[]::new);
try {
final CompletedMultipartUpload completedUpload = CompletedMultipartUpload.builder()
.parts(parts)
.build();
s3.completeMultipartUpload(CompleteMultipartUploadRequest.builder()
.bucket(objectId.bucket())
.key(objectId.key())
.uploadId(uploadId)
.multipartUpload(completedUpload)
.build());
} catch (final SdkException e) {
throw new IOException("Failed to complete Amazon S3 multipart upload", e);
}
log.trace("Completed upload to {} consisting of {} parts", objectId, partCount);
uploadId = null;
completedParts = null;
}
/**
* Stores the given buffer using a single-part upload process
* @param buf
* @throws IOException
*/
private void putObject(ByteBuffer buf, byte[] checksum) throws IOException {
// cast to prevent Java 8 / Java 11 cross compile-runtime error
// https://www.morling.dev/blog/bytebuffer-and-the-dreaded-nosuchmethoderror/
((java.nio.Buffer)buf).flip();
putObject(new ByteBufferInputStream(buf), buf.limit(), checksum);
}
/**
* Stores the given buffer using a single-part upload process
*
* @param contentLength
* @param content
* @throws IOException
*/
private void putObject(final InputStream content, final long contentLength, byte[] checksum) throws IOException {
final PutObjectRequest.Builder reqBuilder = PutObjectRequest.builder();
reqBuilder.bucket(objectId.bucket());
reqBuilder.key(objectId.key());
reqBuilder.contentLength(contentLength);
reqBuilder.contentMD5( Base64.getEncoder().encodeToString(checksum) );
if( cannedAcl!=null ) {
reqBuilder.acl(cannedAcl);
}
if (storageClass != null) {
reqBuilder.storageClass(storageClass);
}
if( tags!=null && tags.size()>0 ) {
reqBuilder.tagging(Tagging.builder().tagSet(tags).build() );
}
if( kmsKeyId !=null ) {
reqBuilder.ssekmsKeyId(kmsKeyId);
}
if( storageEncryption != null ) {
reqBuilder.serverSideEncryption( storageEncryption );
}
if( contentType != null ) {
reqBuilder.contentType(contentType);
}
PutObjectRequest request = reqBuilder.build();
if( log.isTraceEnabled() ) {
log.trace("S3 putObject {}", request);
}
try {
s3.putObject(request, RequestBody.fromInputStream(content, contentLength));
} catch (final SdkException e) {
throw new IOException("Failed to put data into Amazon S3 object", e);
}
}
/**
* @return Number of uploaded chunks
*/
int getPartsCount() {
return partsCount;
}
/** holds a singleton executor instance */
static private volatile ExecutorService executorSingleton;
/**
* Creates a singleton executor instance.
*
* @param maxThreads
* The max number of allowed threads in the executor pool.
* NOTE: changing the size parameter after the first invocation has no effect.
* @return The executor instance
*/
static synchronized ExecutorService getOrCreateExecutor(int maxThreads) {
if( executorSingleton == null ) {
executorSingleton = ThreadPoolManager.create("S3StreamUploader", maxThreads);
}
return executorSingleton;
}
}

View File

@@ -0,0 +1,614 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.WatchEvent;
import java.nio.file.WatchKey;
import java.nio.file.WatchService;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
import nextflow.cloud.aws.nio.util.S3ObjectId;
import software.amazon.awssdk.services.s3.model.S3Object;
import software.amazon.awssdk.services.s3.model.Tag;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import nextflow.file.TagAwareFile;
import static com.google.common.collect.Iterables.concat;
import static com.google.common.collect.Iterables.filter;
import static com.google.common.collect.Iterables.transform;
import static java.lang.String.format;
public class S3Path implements Path, TagAwareFile {
public static final String PATH_SEPARATOR = "/";
/**
* bucket name
*/
private final String bucket;
/**
* Parts without bucket name.
*/
private final List<String> parts;
/**
* actual filesystem
*/
private S3FileSystem fileSystem;
private S3Object object;
private Map<String,String> tags;
private String contentType;
private String storageClass;
/**
* path must be a string of the form "/{bucket}", "/{bucket}/{key}" or just
* "{key}".
* Examples:
* <ul>
* <li>"/{bucket}//{value}" good, empty key paths are ignored </li>
* <li> "//{key}" error, missing bucket</li>
* <li> "/" error, missing bucket </li>
* </ul>
*
*/
public S3Path(S3FileSystem fileSystem, String path) {
this(fileSystem, path, "");
}
/**
* Build an S3Path from path segments. '/' are stripped from each segment.
* @param first should be star with a '/' and the first element is the bucket
* @param more directories and files
*/
public S3Path(S3FileSystem fileSystem, String first,
String ... more) {
String bucket = null;
List<String> parts = Lists.newArrayList(Splitter.on(PATH_SEPARATOR).split(first));
if (first.endsWith(PATH_SEPARATOR)) {
parts.remove(parts.size()-1);
}
if (first.startsWith(PATH_SEPARATOR)) { // absolute path
Preconditions.checkArgument(parts.size() >= 1,
"path must start with bucket name");
Preconditions.checkArgument(!parts.get(1).isEmpty(),
"bucket name must be not empty");
bucket = parts.get(1);
if (!parts.isEmpty()) {
parts = parts.subList(2, parts.size());
}
}
if (bucket != null) {
bucket = bucket.replace("/", "");
}
List<String> moreSplitted = Lists.newArrayList();
for (String part : more){
moreSplitted.addAll(Lists.newArrayList(Splitter.on(PATH_SEPARATOR).split(part)));
}
parts.addAll(moreSplitted);
this.bucket = bucket;
this.parts = KeyParts.parse(parts);
this.fileSystem = fileSystem;
}
private S3Path(S3FileSystem fileSystem, String bucket,
Iterable<String> keys){
this.bucket = bucket;
this.parts = KeyParts.parse(keys);
this.fileSystem = fileSystem;
}
public String getBucket() {
return bucket;
}
/**
* key for amazon without final slash.
* <b>note:</b> the final slash need to be added to save a directory (Amazon s3 spec)
*/
public String getKey() {
if (parts.isEmpty()) {
return "";
}
ImmutableList.Builder<String> builder = ImmutableList
.<String> builder().addAll(parts);
return Joiner.on(PATH_SEPARATOR).join(builder.build());
}
public S3ObjectId toS3ObjectId() {
return new S3ObjectId(bucket, getKey());
}
@Override
public S3FileSystem getFileSystem() {
return this.fileSystem;
}
@Override
public boolean isAbsolute() {
return bucket != null;
}
@Override
public Path getRoot() {
if (isAbsolute()) {
return new S3Path(fileSystem, bucket, ImmutableList.<String> of());
}
return null;
}
@Override
public Path getFileName() {
if (!parts.isEmpty()) {
return new S3Path(fileSystem, null, parts.subList(parts.size() - 1,
parts.size()));
}
else {
// bucket dont have fileName
return null;
}
}
@Override
public Path getParent() {
// bucket is not present in the parts
if (parts.isEmpty()) {
return null;
}
if (parts.size() == 1 && (bucket == null || bucket.isEmpty())){
return null;
}
return new S3Path(fileSystem, bucket,
parts.subList(0, parts.size() - 1));
}
@Override
public int getNameCount() {
return parts.size();
}
@Override
public Path getName(int index) {
return new S3Path(fileSystem, null, parts.subList(index, index + 1));
}
@Override
public Path subpath(int beginIndex, int endIndex) {
return new S3Path(fileSystem, null, parts.subList(beginIndex, endIndex));
}
@Override
public boolean startsWith(Path other) {
if (other.getNameCount() > this.getNameCount()){
return false;
}
if (!(other instanceof S3Path)){
return false;
}
S3Path path = (S3Path) other;
if (path.parts.size() == 0 && path.bucket == null &&
(this.parts.size() != 0 || this.bucket != null)){
return false;
}
if ((path.getBucket() != null && !path.getBucket().equals(this.getBucket())) ||
(path.getBucket() == null && this.getBucket() != null)){
return false;
}
for (int i = 0; i < path.parts.size() ; i++){
if (!path.parts.get(i).equals(this.parts.get(i))){
return false;
}
}
return true;
}
@Override
public boolean startsWith(String path) {
S3Path other = new S3Path(this.fileSystem, path);
return this.startsWith(other);
}
@Override
public boolean endsWith(Path other) {
if (other.getNameCount() > this.getNameCount()){
return false;
}
// empty
if (other.getNameCount() == 0 &&
this.getNameCount() != 0){
return false;
}
if (!(other instanceof S3Path)){
return false;
}
S3Path path = (S3Path) other;
if ((path.getBucket() != null && !path.getBucket().equals(this.getBucket())) ||
(path.getBucket() != null && this.getBucket() == null)){
return false;
}
// check subkeys
int i = path.parts.size() - 1;
int j = this.parts.size() - 1;
for (; i >= 0 && j >= 0 ;){
if (!path.parts.get(i).equals(this.parts.get(j))){
return false;
}
i--;
j--;
}
return true;
}
@Override
public boolean endsWith(String other) {
return this.endsWith(new S3Path(this.fileSystem, other));
}
@Override
public Path normalize() {
if( parts==null || parts.size()==0 )
return this;
return new S3Path(fileSystem, bucket, normalize0(parts));
}
private Iterable<String> normalize0(List<String> parts) {
final String s0 = Path.of(String.join(PATH_SEPARATOR, parts)).normalize().toString();
return Lists.newArrayList(Splitter.on(PATH_SEPARATOR).split(s0));
}
@Override
public Path resolve(Path other) {
Preconditions.checkArgument(other instanceof S3Path,
"other must be an instance of %s", S3Path.class.getName());
S3Path s3Path = (S3Path) other;
if (s3Path.isAbsolute()) {
return s3Path;
}
if (s3Path.parts.isEmpty()) { // other is relative and empty
return this;
}
return new S3Path(fileSystem, bucket, concat(parts, s3Path.parts));
}
@Override
public Path resolve(String other) {
return resolve(new S3Path(this.getFileSystem(), other));
}
@Override
public Path resolveSibling(Path other) {
Preconditions.checkArgument(other instanceof S3Path,
"other must be an instance of %s", S3Path.class.getName());
S3Path s3Path = (S3Path) other;
Path parent = getParent();
if (parent == null || s3Path.isAbsolute()) {
return s3Path;
}
if (s3Path.parts.isEmpty()) { // other is relative and empty
return parent;
}
return new S3Path(fileSystem, bucket, concat(
parts.subList(0, parts.size() - 1), s3Path.parts));
}
@Override
public Path resolveSibling(String other) {
return resolveSibling(new S3Path(this.getFileSystem(), other));
}
@Override
public Path relativize(Path other) {
Preconditions.checkArgument(other instanceof S3Path,
"other must be an instance of %s", S3Path.class.getName());
S3Path s3Path = (S3Path) other;
if (this.equals(other)) {
return new S3Path(this.getFileSystem(), "");
}
Preconditions.checkArgument(isAbsolute(),
"Path is already relative: %s", this);
Preconditions.checkArgument(s3Path.isAbsolute(),
"Cannot relativize against a relative path: %s", s3Path);
Preconditions.checkArgument(bucket.equals(s3Path.getBucket()),
"Cannot relativize paths with different buckets: '%s', '%s'",
this, other);
Preconditions.checkArgument(parts.size() <= s3Path.parts.size(),
"Cannot relativize against a parent path: '%s', '%s'",
this, other);
int startPart = 0;
for (int i = 0; i <this.parts.size() ; i++){
if (this.parts.get(i).equals(s3Path.parts.get(i))){
startPart++;
}
}
List<String> resultParts = new ArrayList<>();
for (int i = startPart; i < s3Path.parts.size(); i++){
resultParts.add(s3Path.parts.get(i));
}
return new S3Path(fileSystem, null, resultParts);
}
@Override
public URI toUri() {
StringBuilder builder = new StringBuilder();
builder.append("s3://");
if (fileSystem.getEndpoint() != null) {
builder.append(fileSystem.getEndpoint());
}
builder.append("/");
builder.append(bucket);
builder.append(PATH_SEPARATOR);
builder.append(Joiner.on(PATH_SEPARATOR).join(parts));
return URI.create(builder.toString());
}
@Override
public Path toAbsolutePath() {
if (isAbsolute()) {
return this;
}
throw new IllegalStateException(format(
"Relative path cannot be made absolute: %s", this));
}
@Override
public Path toRealPath(LinkOption... options) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public File toFile() {
throw new UnsupportedOperationException();
}
@Override
public WatchKey register(WatchService watcher, WatchEvent.Kind<?>[] events,
WatchEvent.Modifier... modifiers) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public WatchKey register(WatchService watcher, WatchEvent.Kind<?>... events)
throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Iterator<Path> iterator() {
ImmutableList.Builder<Path> builder = ImmutableList.builder();
for (Iterator<String> iterator = parts.iterator(); iterator.hasNext();) {
String part = iterator.next();
builder.add(new S3Path(fileSystem, null, ImmutableList.of(part)));
}
return builder.build().iterator();
}
@Override
public int compareTo(Path other) {
return toString().compareTo(other.toString());
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
if (isAbsolute()) {
builder.append(PATH_SEPARATOR);
builder.append(bucket);
builder.append(PATH_SEPARATOR);
}
builder.append(Joiner.on(PATH_SEPARATOR).join(parts));
return builder.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
S3Path paths = (S3Path) o;
if (bucket != null ? !bucket.equals(paths.bucket)
: paths.bucket != null) {
return false;
}
if (!parts.equals(paths.parts)) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = bucket != null ? bucket.hashCode() : 0;
result = 31 * result + parts.hashCode();
return result;
}
/**
* This method returns the cached {@link S3Object} instance if this path has been created
* while iterating a directory structures by the {@link S3Iterator}.
* <br>
* After calling this method the cached object is reset, so any following method invocation will return {@code null}.
* This is necessary to discard the object meta-data and force to reload file attributes when required.
*
* @return The cached {@link S3Object} for this path if any.
*/
public S3Object fetchObject() {
S3Object result = object;
object = null;
return result;
}
// note: package scope to limit the access to this setter
void setObjectSummary(S3Object objectSummary) {
this.object = objectSummary;
}
@Override
public void setTags(Map<String,String> tags) {
this.tags = tags;
}
@Override
public void setContentType(String type) {
this.contentType = type;
}
@Override
public void setStorageClass(String storageClass) {
this.storageClass = storageClass;
}
public List<Tag> getTagsList() {
// nothing found, just return
if( tags==null )
return Collections.emptyList();
// create a list of Tag out of the Map
List<Tag> result = new ArrayList<>();
for( Map.Entry<String,String> entry : tags.entrySet()) {
result.add( Tag.builder().key(entry.getKey()).value(entry.getValue()).build() );
}
return result;
}
public String getContentType() {
return contentType;
}
public String getStorageClass() {
return storageClass;
}
// ~ helpers methods
private static Function<String, String> strip(final String ... strs) {
return new Function<String, String>() {
public String apply(String input) {
String res = input;
for (String str : strs) {
res = res.replace(str, "");
}
return res;
}
};
}
private static Predicate<String> notEmpty() {
return new Predicate<String>() {
@Override
public boolean apply(@Nullable String input) {
return input != null && !input.isEmpty();
}
};
}
/*
* delete redundant "/" and empty parts
*/
private abstract static class KeyParts {
private static ImmutableList<String> parse(List<String> parts) {
return ImmutableList.copyOf(filter(transform(parts, strip("/")), notEmpty()));
}
private static ImmutableList<String> parse(Iterable<String> parts) {
return ImmutableList.copyOf(filter(transform(parts, strip("/")), notEmpty()));
}
}
public static String bucketName(URI uri) {
final String path = uri.getPath();
if( path==null || !path.startsWith("/") )
throw new IllegalArgumentException("Invalid S3 path: " + uri);
final String[] parts = path.split("/");
// note the element 0 contains the slash char
return parts.length>1 ? parts[1] : null;
}
}

View File

@@ -0,0 +1,99 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.ng;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
/**
* Hold a buffer for transfer a remote object chunk
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@Deprecated
public class ChunkBuffer implements Comparable<ChunkBuffer> {
private static final int BUFFER_SIZE = 8192;
private final ByteBuffer target;
private final ChunkBufferFactory owner;
private final int index;
ChunkBuffer(ChunkBufferFactory owner, int capacity, int index) {
this.owner = owner;
this.target = ByteBuffer.allocateDirect(capacity);
this.index = index;
}
int getIndex() {
return index;
}
int getByte() {
return target.get() & 0xFF;
}
void writeByte(int ch) {
target.put((byte)ch);
}
void fill(InputStream stream) throws IOException {
int n;
byte[] b = new byte[BUFFER_SIZE];
while ((n = stream.read(b)) != -1 ) {
target.put(b, 0, n);
}
}
void makeReadable() {
// cast to prevent Java 8 / Java 11 cross compile-runtime error
// https://www.morling.dev/blog/bytebuffer-and-the-dreaded-nosuchmethoderror/
((java.nio.Buffer)target).flip();
}
void clear() {
// cast to prevent Java 8 / Java 11 cross compile-runtime error
// https://www.morling.dev/blog/bytebuffer-and-the-dreaded-nosuchmethoderror/
((java.nio.Buffer)target).clear();
}
int getBytes( byte[] buff, int off, int len ) {
int c=0;
int i=off;
while( c<len && target.hasRemaining() ) {
c++;
buff[i++] = target.get();
}
return c;
}
boolean hasRemaining() {
return target.hasRemaining();
}
public void release() {
owner.giveBack(this);
}
@Override
public int compareTo(ChunkBuffer other) {
return Integer.compare(index, other.index);
}
}

View File

@@ -0,0 +1,80 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.ng;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Model a buffer for download chunk
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
public class ChunkBufferFactory {
final Logger log = LoggerFactory.getLogger(ChunkBufferFactory.class);
final private BlockingQueue<ChunkBuffer> pool;
final private AtomicInteger count;
private final int chunkSize;
private final int capacity;
public ChunkBufferFactory(int chunkSize, int capacity) {
this.chunkSize = chunkSize;
this.capacity = capacity;
this.pool = new ArrayBlockingQueue<>(capacity);
this.count = new AtomicInteger();
}
public ChunkBuffer create() throws InterruptedException {
ChunkBuffer result = pool.poll(100, TimeUnit.MILLISECONDS);
if( result != null ) {
result.clear();
return result;
}
// add logistic delay to slow down the allocation of new buffer
// when the request approach or exceed the max capacity
final int indx = count.getAndIncrement();
if( log.isTraceEnabled() )
log.trace("Creating a new buffer index={}; capacity={}", indx, capacity);
return new ChunkBuffer(this, chunkSize, indx);
}
void giveBack(ChunkBuffer buffer) {
if( pool.offer(buffer) ) {
if( log.isTraceEnabled() )
log.trace("Returning buffer {} to pool size={}", buffer.getIndex(), pool.size());
}
else {
int cc = count.decrementAndGet();
if( log.isTraceEnabled() )
log.trace("Returning buffer index={} for GC; pool size={}; count={}", buffer.getIndex(), pool.size(), cc);
}
}
int getPoolSize() { return pool.size(); }
}

View File

@@ -0,0 +1,111 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.ng;
import java.util.Collections;
import java.util.Map;
import java.util.Properties;
import nextflow.util.Duration;
import nextflow.util.MemoryUnit;
/**
* Model S3 download options
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
public class DownloadOpts {
final private boolean parallelEnabled;
private final int queueMaxSize;
private final int numWorkers;
private final MemoryUnit chunkSize;
private final MemoryUnit bufferMaxSize;
private final int maxAttempts;
private final Duration maxDelay;
DownloadOpts() {
this(new Properties(), Collections.emptyMap());
}
DownloadOpts(Map opts) {
this(props(opts), Collections.emptyMap());
}
static private Properties props(Map opts) {
Properties result = new Properties();
result.putAll(opts);
return result;
}
DownloadOpts(Properties props, Map<String,String> env) {
this.parallelEnabled = props.containsKey("download_parallel")
? Boolean.parseBoolean(props.getProperty("download_parallel")) : (env.containsKey("NXF_S3_DOWNLOAD_PARALLEL") ? Boolean.parseBoolean(env.get("NXF_S3_DOWNLOAD_PARALLEL")) : false);
this.queueMaxSize = props.containsKey("download_queue_max_size")
? Integer.parseInt(props.getProperty("download_queue_max_size")) : ( env.containsKey("NXF_S3_DOWNLOAD_QUEUE_SIZE") ? Integer.parseInt(env.get("NXF_S3_DOWNLOAD_QUEUE_SIZE")) : 10_000 );
this.numWorkers = props.containsKey("download_num_workers")
? Integer.parseInt(props.getProperty("download_num_workers")) : ( env.containsKey("NXF_S3_DOWNLOAD_NUM_WORKERS") ? Integer.parseInt(env.get("NXF_S3_DOWNLOAD_NUM_WORKERS")) : 10 );
this.chunkSize = props.containsKey("download_chunk_size")
? MemoryUnit.of(props.getProperty("download_chunk_size")) : ( env.containsKey("NXF_S3_DOWNLOAD_CHUNK_SIZE") ? MemoryUnit.of(env.get("NXF_S3_DOWNLOAD_CHUNK_SIZE")) : MemoryUnit.of("10 MB") );
this.bufferMaxSize = props.containsKey("download_buffer_max_size")
? MemoryUnit.of(props.getProperty("download_buffer_max_size")) : ( env.containsKey("NXF_S3_DOWNLOAD_BUFFER_MAX_MEM") ? MemoryUnit.of(env.get("NXF_S3_DOWNLOAD_BUFFER_MAX_MEM")) : MemoryUnit.of("1 GB") );
this.maxAttempts = props.containsKey("download_max_attempts")
? Integer.parseInt(props.getProperty("download_max_attempts")) : ( env.containsKey("NXF_S3_DOWNLOAD_MAX_ATTEMPTS") ? Integer.parseInt(env.get("NXF_S3_DOWNLOAD_MAX_ATTEMPTS")) : 5 );
this.maxDelay = props.containsKey("download_max_delay")
? Duration.of(props.getProperty("download_max_delay")) : ( env.containsKey("NXF_S3_DOWNLOAD_MAX_DELAY") ? Duration.of(env.get("NXF_S3_DOWNLOAD_MAX_DELAY")) : Duration.of("90s") );
}
static public DownloadOpts from(Properties props) {
return from(props, System.getenv());
}
static public DownloadOpts from(Properties props, Map<String,String> env) {
return new DownloadOpts(props, env);
}
public boolean parallelEnabled() { return parallelEnabled; }
@Deprecated public int queueMaxSize() { return queueMaxSize; }
public MemoryUnit chunkSizeMem() { return chunkSize; }
public int chunkSize() { return (int)chunkSize.toBytes(); }
public MemoryUnit bufferMaxSize() { return bufferMaxSize; }
public int numWorkers() { return numWorkers; }
public long maxDelayMillis() {
return maxDelay.getMillis();
}
public int maxAttempts() {
return maxAttempts;
}
@Override
public String toString() {
return String.format("workers=%s; chunkSize=%s; queueSize=%s; max-mem=%s; maxAttempts=%s; maxDelay=%s", numWorkers, chunkSize, queueMaxSize, bufferMaxSize, maxAttempts, maxDelay);
}
}

View File

@@ -0,0 +1,95 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.ng;
import java.io.IOException;
import java.io.InputStream;
import java.io.InterruptedIOException;
import java.util.Iterator;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
/**
* Implements an input stream emitting a collection of futures {@link ChunkBuffer}
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
public class FutureInputStream extends InputStream {
private final Iterator<Future<ChunkBuffer>> futures;
private ChunkBuffer buffer;
FutureInputStream(Iterator<Future<ChunkBuffer>> futures) {
this.futures = futures;
}
@Override
public int read() throws IOException {
if( (buffer == null || !buffer.hasRemaining()) ) {
freeBuffer();
if( futures.hasNext() ) {
buffer = nextBuffer();
}
else {
return -1;
}
}
return buffer.getByte();
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
if( (buffer == null || !buffer.hasRemaining()) ) {
freeBuffer();
if( futures.hasNext() ) {
buffer = nextBuffer();
}
else {
return -1;
}
}
return buffer.getBytes(b, off, len);
}
private ChunkBuffer nextBuffer() throws IOException {
try {
return futures.next().get();
}
catch (ExecutionException e) {
throw new IOException("Failed to acquire stream chunk", e);
}
catch (InterruptedException e) {
throw new InterruptedIOException();
}
}
private void freeBuffer() {
if( buffer!=null ) {
buffer.release();
buffer=null;
}
}
@Override
public void close() {
freeBuffer();
}
}

View File

@@ -0,0 +1,88 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.ng;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.function.Function;
/**
* Implements an iterator that progressively submit a collection of tasks to the
* specifies executor and iterates over the responses returned as {@link Future}
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
* @author Jordi Deu-Pons <jordi@seqera.io>
*/
public class FutureIterator<REQ,RESP> implements Iterator<Future<RESP>> {
final private ExecutorService executor;
final private Iterator<REQ> parts;
final private Queue<Future<RESP>> futures = new LinkedList<>();
final private Function<REQ, RESP> task;
final private int initialSize;
FutureIterator(List<REQ> parts, Function<REQ, RESP> task, ExecutorService executor, int initialSize) {
this.parts = parts.iterator();
this.task = task;
this.executor = executor;
this.initialSize = initialSize;
init();
}
private void init() {
// Add up to `numWorkers` *2 parts on start
int submitted = 0;
while (parts.hasNext() && submitted++ < initialSize ) {
// note: making `parts.next()` inline in the lambda causes to delay
// the evaluate in a separate thread causing concurrency problems
REQ req = parts.next();
futures.add(executor.submit( () -> task.apply(req) ));
}
}
@Override
public boolean hasNext() {
return !futures.isEmpty() || parts.hasNext();
}
@Override
public Future<RESP> next() {
// keep busy the download workers adding a new chunk
// to download each time one is consumed
if( parts.hasNext() ) {
// note: making `parts.next()` inline in the lambda causes to delay
// the evaluate in a separate thread causing concurrency problems
REQ req = parts.next();
futures.add(executor.submit( () -> task.apply(req)) );
}
try {
return futures.poll();
}
catch (Throwable t) {
// in case of error cancel all pending tasks
for( Future<RESP> it : futures ) {
it.cancel(true);
}
throw t;
}
}
}

View File

@@ -0,0 +1,58 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.util;
/**
* @author Paolo Di Tommaso paolo.ditommaso@gmail.com
*/
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
/**
* An {@code InputStream} adaptor which reads data from a {@code ByteBuffer}
*
* See http://stackoverflow.com/a/6603018/395921
*
* @author Paolo Di Tommaso paolo.ditommaso@gmail.com
*/
public class ByteBufferInputStream extends InputStream {
ByteBuffer buf;
public ByteBufferInputStream(ByteBuffer buf) {
this.buf = buf;
}
public int read() throws IOException {
if (!buf.hasRemaining()) {
return -1;
}
return buf.get() & 0xFF;
}
public int read(byte[] bytes, int off, int len) throws IOException {
if (!buf.hasRemaining()) {
return -1;
}
len = Math.min(len, buf.remaining());
buf.get(bytes, off, len);
return len;
}
}

View File

@@ -0,0 +1,168 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.util;
import java.util.Properties;
import java.util.concurrent.Semaphore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.transfer.s3.S3TransferManager;
import software.amazon.awssdk.transfer.s3.model.Copy;
import software.amazon.awssdk.transfer.s3.model.CopyRequest;
import software.amazon.awssdk.transfer.s3.model.DirectoryUpload;
import software.amazon.awssdk.transfer.s3.model.DownloadFileRequest;
import software.amazon.awssdk.transfer.s3.model.FileDownload;
import software.amazon.awssdk.transfer.s3.model.FileUpload;
import software.amazon.awssdk.transfer.s3.model.UploadDirectoryRequest;
import software.amazon.awssdk.transfer.s3.model.UploadFileRequest;
import static nextflow.cloud.aws.config.AwsS3Config.*;
/**
* Extends the S3 Transfer Manager with semaphores to limit concurrent
* transfers based on available resources.
*
* Copies and uploads are limited based on the `maxConnections` setting.
*
* Downloads are limited based on the `maxDownloadHeapMemory` setting. The
* CRT client allocates a buffer of 10 * part size for each transfer by default.
*
* @see https://github.com/aws/aws-sdk-java-v2/issues/6323
*
* @author Jorge Ejarque (jorge.ejarque@seqera.io)
*/
public class ExtendedS3TransferManager {
private static final Logger log = LoggerFactory.getLogger(ExtendedS3TransferManager.class);
private S3TransferManager transferManager;
private Semaphore semaphore;
private long partSize;
private int downloadPermits;
private Semaphore downloadSemaphore;
public ExtendedS3TransferManager( S3TransferManager transferManager, Properties props){
this.transferManager = transferManager;
setDefaultSemaphore(props);
setDownloadSemaphore(props);
}
private void setDefaultSemaphore(Properties props) {
int permits = 100;
if( props.containsKey("max_connections")) {
permits = Integer.parseInt(props.getProperty("max_connections"));
}
this.semaphore = new Semaphore(permits);
}
private void setDownloadSemaphore(Properties props) {
long maxBufferSize = DEFAULT_MAX_DOWNLOAD_BUFFER_SIZE;
if( props.containsKey("max_download_heap_memory")) {
log.trace("AWS client config - max_download_heap_memory: {}", props.getProperty("max_download_heap_memory"));
maxBufferSize = Long.parseLong(props.getProperty("max_download_heap_memory"));
}
this.partSize = DEFAULT_PART_SIZE;
if( props.containsKey("minimum_part_size")) {
log.trace("AWS client config - minimum_part_size: {}", props.getProperty("minimum_part_size"));
this.partSize = Long.parseLong(props.getProperty("minimum_part_size"));
}
this.downloadPermits = (int) Math.floor((double) maxBufferSize / partSize);
this.downloadSemaphore = new Semaphore(downloadPermits);
}
public long getPartSize() {
return partSize;
}
public int getDownloadPermits() {
return downloadPermits;
}
public FileDownload downloadFile(DownloadFileRequest request, long size) throws InterruptedException {
int parts = estimateParts(size);
FileDownload fileDownload;
downloadSemaphore.acquire(parts);
try {
fileDownload = transferManager.downloadFile(request);
} catch (Throwable e) {
// Release semaphore when runtime exception during the downloadFile submission
downloadSemaphore.release(parts);
throw e;
}
// Ensure permits are always released after completion
fileDownload
.completionFuture()
.whenComplete((result, error) -> downloadSemaphore.release(parts));
return fileDownload;
}
protected int estimateParts(long size) {
if (size <= 0)
return 1;
int parts = (int) Math.ceil((double) size / partSize);
return Math.min(parts, DEFAULT_INIT_BUFFER_PARTS);
}
public FileUpload uploadFile(UploadFileRequest request) throws InterruptedException {
FileUpload fileUpload;
semaphore.acquire();
try {
fileUpload = transferManager.uploadFile(request);
} catch (Throwable e) {
semaphore.release();
throw e;
}
fileUpload
.completionFuture()
.whenComplete((result, error) -> semaphore.release());
return fileUpload;
}
public DirectoryUpload uploadDirectory(UploadDirectoryRequest request) throws InterruptedException {
DirectoryUpload directoryUpload;
semaphore.acquire();
try {
directoryUpload = transferManager.uploadDirectory(request);
} catch (Throwable e) {
semaphore.release();
throw e;
}
directoryUpload
.completionFuture()
.whenComplete((result, error) -> semaphore.release());
return directoryUpload;
}
public Copy copy(CopyRequest request) throws InterruptedException {
Copy copy;
semaphore.acquire();
try {
copy = transferManager.copy(request);
} catch (Throwable e) {
semaphore.release();
throw e;
}
copy
.completionFuture()
.whenComplete((result, error) -> semaphore.release());
return copy;
}
}

View File

@@ -0,0 +1,47 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.util;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* Utilities for streams
*/
public abstract class IOUtils {
/**
* get the stream content and return as a byte array
* @param is InputStream
* @return byte array
* @throws IOException if the stream is closed
*/
public static byte[] toByteArray(InputStream is) throws IOException {
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
int nRead;
byte[] data = new byte[16384];
while ((nRead = is.read(data, 0, data.length)) != -1) {
buffer.write(data, 0, nRead);
}
buffer.flush();
return buffer.toByteArray();
}
}

View File

@@ -0,0 +1,170 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.util;
import software.amazon.awssdk.services.s3.crt.S3CrtConnectionHealthConfiguration;
import software.amazon.awssdk.services.s3.crt.S3CrtProxyConfiguration;
import software.amazon.awssdk.services.s3.crt.S3CrtHttpConfiguration;
import software.amazon.awssdk.services.s3.crt.S3CrtRetryConfiguration;
import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration;
import java.time.Duration;
import java.util.Properties;
/**
* Class to convert Amazon properties in S3 asynchronous client configuration
*
* @author Jorge Ejarque <jorge.ejarque@seqera.io>
*/
public class S3AsyncClientConfiguration extends S3ClientConfiguration {
private static final long DEFAULT_SOCKET_TIMEOUT_MS = 30_000L;
private S3CrtHttpConfiguration.Builder crtHttpConfiguration;
private MultipartConfiguration.Builder multiPartBuilder;
private S3CrtRetryConfiguration crtRetryConfiguration;
private Integer maxConcurrency ;
private Double targetThroughputInGbps;
private Long maxNativeMemoryInBytes;
private S3CrtHttpConfiguration.Builder crtHttpConfiguration() {
if( this.crtHttpConfiguration == null)
this.crtHttpConfiguration = S3CrtHttpConfiguration.builder();
return this.crtHttpConfiguration;
}
private MultipartConfiguration.Builder multipartBuilder() {
if( this.multiPartBuilder == null)
this.multiPartBuilder = MultipartConfiguration.builder();
return this.multiPartBuilder;
}
public S3CrtHttpConfiguration getCrtHttpConfiguration() {
if ( this.crtHttpConfiguration == null )
return null;
return this.crtHttpConfiguration.build();
}
public MultipartConfiguration getMultipartConfiguration() {
if( this.multiPartBuilder == null )
return null;
return this.multiPartBuilder.build();
}
private S3AsyncClientConfiguration() {
super();
}
public S3CrtRetryConfiguration getCrtRetryConfiguration() {
return this.crtRetryConfiguration;
}
public Integer getMaxConcurrency() {
return this.maxConcurrency;
}
public Double getTargetThroughputInGbps() {
return this.targetThroughputInGbps;
}
public Long getMaxNativeMemoryInBytes() {
return this.maxNativeMemoryInBytes;
}
private void setAsyncConfiguration(Properties props) {
if( props.containsKey("max_error_retry")) {
log.trace("AWS client config - max_error_retry: {}", props.getProperty("max_error_retry"));
this.crtRetryConfiguration = S3CrtRetryConfiguration.builder().numRetries(Integer.parseInt(props.getProperty("max_error_retry"))).build();
}
if( props.containsKey("max_concurrency")) {
log.trace("AWS client config - max_concurrency: {}", props.getProperty("max_concurrency"));
this.maxConcurrency = Integer.parseInt(props.getProperty("max_concurrency"));
}
if( props.containsKey("target_throughput_in_gbps")) {
log.trace("AWS client config - target_throughput_in_gbps: {}", props.getProperty("target_throughput_in_gbps"));
this.targetThroughputInGbps = Double.parseDouble(props.getProperty("target_throughput_in_gbps"));
}
if( props.containsKey("max_native_memory")) {
log.trace("AWS client config - max_native_memory: {}", props.getProperty("max_native_memory"));
this.maxNativeMemoryInBytes = Long.parseLong(props.getProperty("max_native_memory"));
}
if( props.containsKey("minimum_part_size")) {
log.trace("AWS client config - minimum_part_size: {}", props.getProperty("minimum_part_size"));
multipartBuilder().minimumPartSizeInBytes(Long.parseLong(props.getProperty("minimum_part_size")));
}
if( props.containsKey("multipart_threshold")) {
log.trace("AWS client config - multipart_threshold: {}", props.getProperty("multipart_threshold"));
multipartBuilder().thresholdInBytes(Long.parseLong(props.getProperty("multipart_threshold")));
}
if( props.containsKey("connection_timeout") ) {
log.trace("AWS client config - connection_timeout: {}", props.getProperty("connection_timeout"));
crtHttpConfiguration().connectionTimeout(Duration.ofMillis(Long.parseLong(props.getProperty("connection_timeout"))));
}
final long socketTimeoutMs = props.containsKey("socket_timeout")
? Long.parseLong(props.getProperty("socket_timeout"))
: DEFAULT_SOCKET_TIMEOUT_MS;
log.trace("AWS client config - socket_timeout: {} (using CRT health configuration with minimum throughput 1bps)", socketTimeoutMs);
crtHttpConfiguration().connectionHealthConfiguration(
S3CrtConnectionHealthConfiguration.builder()
.minimumThroughputInBps(1L)
.minimumThroughputTimeout(Duration.ofMillis(socketTimeoutMs))
.build()
);
if( props.containsKey("proxy_host")) {
final String host = props.getProperty("proxy_host");
final S3CrtProxyConfiguration.Builder crtProxyConfig = S3CrtProxyConfiguration.builder();
log.trace("AWS client config - proxy host {}", host);
crtProxyConfig.host(host);
if (props.containsKey("proxy_port")) {
crtProxyConfig.port(Integer.parseInt(props.getProperty("proxy_port")));
}
if (props.containsKey("proxy_username")) {
crtProxyConfig.username(props.getProperty("proxy_username"));
}
if (props.containsKey("proxy_password")) {
crtProxyConfig.password(props.getProperty("proxy_password"));
}
if (props.containsKey("proxy_scheme")) {
crtProxyConfig.scheme(props.getProperty("proxy_scheme"));
}
if (props.containsKey("proxy_domain")) {
log.warn("AWS client config 'proxy_domain' doesn't exist in AWS SDK V2 Async Client");
}
if (props.containsKey("proxy_workstation")) {
log.warn("AWS client config 'proxy_workstation' doesn't exist in AWS SDK V2 Async Client");
}
crtHttpConfiguration().proxyConfiguration(crtProxyConfig.build());
}
}
public static S3AsyncClientConfiguration create(Properties props) {
S3AsyncClientConfiguration config = new S3AsyncClientConfiguration();
if( props != null ) {
config.setClientOverrideConfiguration(props);
config.setAsyncConfiguration(props);
}
return config;
}
}

View File

@@ -0,0 +1,84 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.auth.signer.Aws4Signer;
import software.amazon.awssdk.auth.signer.AwsS3V4Signer;
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption;
import software.amazon.awssdk.core.signer.Signer;
import software.amazon.awssdk.retries.StandardRetryStrategy;
import software.amazon.awssdk.utils.ClassLoaderHelper;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Properties;
/**
* Class to convert Amazon properties in S3 client override configuration
*
* @author Jorge Ejarque <jorge.ejarque@seqera.io>
*/
public class S3ClientConfiguration {
protected static final Logger log = LoggerFactory.getLogger(S3ClientConfiguration.class);
private ClientOverrideConfiguration.Builder cocBuilder;
private ClientOverrideConfiguration.Builder cocBuilder() {
if( this.cocBuilder == null )
this.cocBuilder = ClientOverrideConfiguration.builder();
return this.cocBuilder;
}
public ClientOverrideConfiguration getClientOverrideConfiguration() {
if( cocBuilder == null )
return null;
return cocBuilder.build();
}
protected S3ClientConfiguration() {}
protected final void setClientOverrideConfiguration(Properties props) {
if( props == null )
return;
if( props.containsKey("max_error_retry")) {
log.trace("AWS client config - max_error_retry: {}", props.getProperty("max_error_retry"));
cocBuilder().retryStrategy(StandardRetryStrategy.builder().maxAttempts((Integer.parseInt(props.getProperty("max_error_retry")) + 1 )).build());
}
if( props.containsKey("protocol")) {
log.warn("AWS client config 'protocol' doesn't exist in AWS SDK V2");
}
if ( props.containsKey("signer_override")) {
log.warn("AWS client config 'signerOverride' is not supported in AWS SDK V2. This option will be ignored.");
}
if( props.containsKey("socket_send_buffer_size_hints") || props.containsKey("socket_recv_buffer_size_hints") ) {
log.warn("AWS client config - 'socket_send_buffer_size_hints' and 'socket_recv_buffer_size_hints' do not exist in AWS SDK V2" );
}
if( props.containsKey("user_agent")) {
log.warn("AWS client config 'user_agent' is not supported in AWS SDK V2. This option will be ignored.");
}
}
}

View File

@@ -0,0 +1,233 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.util;
import java.util.Properties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@SuppressWarnings("unchecked")
public class S3MultipartOptions {
private static final Logger log = LoggerFactory.getLogger(S3MultipartOptions.class);
public static final int DEFAULT_CHUNK_SIZE = 100 << 20; // 100 MiB
public static final int DEFAULT_BUFFER_SIZE = 10485760;
/*
* S3 Max copy size
* https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html
*/
public static final long DEFAULT_MAX_COPY_SIZE = 5_000_000_000L;
/**
* Upload chunk max size
*/
private int chunkSize;
/**
* Maximum number of threads allowed
*/
private int maxThreads;
/**
* Buffer size used by the stream uploader
*/
private int bufferSize;
/**
* Copy object max size
*/
private long maxCopySize;
/**
* Maximum number of attempts to upload a chunk in a multiparts upload process
*/
private int maxAttempts;
/**
* Time (milliseconds) to wait after a failed upload to retry a chunk upload
*/
private long retrySleep;
/*
* initialize default values
*/
{
retrySleep = 500;
chunkSize = DEFAULT_CHUNK_SIZE;
maxAttempts = 5;
maxThreads = Runtime.getRuntime().availableProcessors() *3;
bufferSize = DEFAULT_BUFFER_SIZE;
maxCopySize = DEFAULT_MAX_COPY_SIZE;
}
public S3MultipartOptions() {
}
public S3MultipartOptions(Properties props) {
setMaxThreads(props.getProperty("upload_max_threads"));
setChunkSize(props.getProperty("upload_chunk_size"));
setMaxAttempts(props.getProperty("upload_max_attempts"));
setRetrySleep(props.getProperty("upload_retry_sleep"));
setBufferSize(props.getProperty("upload_buffer_size"));
setMaxCopySize(props.getProperty("max_copy_size"));
}
public int getChunkSize() {
return chunkSize;
}
public int getMaxThreads() {
return maxThreads;
}
public int getMaxAttempts() {
return maxAttempts;
}
public long getRetrySleep() {
return retrySleep;
}
public int getBufferSize() { return bufferSize; }
public long getMaxCopySize() { return maxCopySize; }
public S3MultipartOptions setChunkSize(int chunkSize) {
this.chunkSize = chunkSize;
return this;
}
public S3MultipartOptions setChunkSize(String chunkSize) {
if( chunkSize==null )
return this;
try {
setChunkSize(Integer.parseInt(chunkSize));
}
catch( NumberFormatException e ) {
log.warn("Not a valid AWS S3 multipart upload chunk size: `{}` -- Using default", chunkSize);
}
return this;
}
public S3MultipartOptions setBufferSize(int bufferSize) {
this.bufferSize = bufferSize;
return this;
}
public S3MultipartOptions setBufferSize(String bufferSize) {
if( bufferSize==null )
return this;
try {
setBufferSize(Integer.parseInt(bufferSize));
}
catch( NumberFormatException e ) {
log.warn("Not a valid AWS S3 multipart upload buffer size: `{}` -- Using default", bufferSize);
}
return this;
}
public S3MultipartOptions setMaxCopySize(String value) {
if( value==null )
return this;
try {
maxCopySize = Long.parseLong(value);
}
catch( NumberFormatException e ) {
log.warn("Not a valid AWS S3 copy max size: `{}` -- Using default", maxCopySize);
}
return this;
}
public S3MultipartOptions setMaxThreads(int maxThreads) {
this.maxThreads = maxThreads;
return this;
}
public S3MultipartOptions setMaxThreads(String maxThreads) {
if( maxThreads==null )
return this;
try {
setMaxThreads(Integer.parseInt(maxThreads));
}
catch( NumberFormatException e ) {
log.warn("Not a valid AWS S3 multipart upload max threads: `{}` -- Using default", maxThreads);
}
return this;
}
public S3MultipartOptions setMaxAttempts(int maxAttempts) {
this.maxAttempts = maxAttempts;
return this;
}
public S3MultipartOptions setMaxAttempts(String maxAttempts) {
if( maxAttempts == null )
return this;
try {
this.maxAttempts = Integer.parseInt(maxAttempts);
}
catch(NumberFormatException e ) {
log.warn("Not a valid AWS S3 multipart upload max attempts value: `{}` -- Using default", maxAttempts);
}
return this;
}
public S3MultipartOptions setRetrySleep( long retrySleep ) {
this.retrySleep = retrySleep;
return this;
}
public S3MultipartOptions setRetrySleep( String retrySleep ) {
if( retrySleep == null )
return this;
try {
this.retrySleep = Long.parseLong(retrySleep);
}
catch (NumberFormatException e ) {
log.warn("Not a valid AWS S3 multipart upload retry sleep value: `{}` -- Using default", retrySleep);
}
return this;
}
public long getRetrySleepWithAttempt( int attempt ) {
return retrySleep * ( 1 << (attempt-1) );
}
@Override
public String toString() {
return "chunkSize=" + chunkSize +
"; maxThreads=" + maxThreads +
"; maxAttempts=" + maxAttempts +
"; retrySleep=" + retrySleep;
}
}

View File

@@ -0,0 +1,48 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.util;
/**
* Class to mimic Old V1 S3ObjectId
*/
public class S3ObjectId {
private final String bucket;
private final String key;
private final String versionId;
public S3ObjectId(String bucket, String key, String versionId) {
this.bucket = bucket;
this.key = key;
this.versionId = versionId;
}
public S3ObjectId(String bucket, String key) {
this(bucket, key, null);
}
public String bucket() {
return bucket;
}
public String key() {
return key;
}
public String versionId() {
return versionId;
}
}

View File

@@ -0,0 +1,158 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.util;
import java.io.IOException;
import java.nio.file.NoSuchFileException;
import java.util.List;
import nextflow.cloud.aws.nio.S3Client;
import software.amazon.awssdk.services.s3.model.*;
import nextflow.cloud.aws.nio.S3Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class S3ObjectSummaryLookup {
private static final Logger log = LoggerFactory.getLogger(S3Object.class);
/**
* Get the {@link software.amazon.awssdk.services.s3.model.S3Object} that represent this Path or its first child if the path does not exist
* @param s3Path {@link S3Path}
* @return {@link software.amazon.awssdk.services.s3.model.S3Object}
* @throws java.io.IOException if path not found, access denied or error getting the object
*/
public S3Object lookup(S3Path s3Path) throws IOException {
/*
* check is object summary has been cached
*/
S3Object summary = s3Path.fetchObject();
if( summary != null ) {
return summary;
}
final S3Client client = s3Path.getFileSystem().getClient();
/*
* when `key` is an empty string retrieve the object meta-data of the bucket
*/
if( "".equals(s3Path.getKey()) ) {
HeadBucketResponse meta = client.getBucketMetadata(s3Path.getBucket());
if( meta == null )
throw new NoSuchFileException("s3://" + s3Path.getBucket());
summary = S3Object.builder()
.key(s3Path.getKey())
.build();
// TODO summary.setOwner(?);
// TODO summary.setStorageClass(?);
return summary;
}
S3Object item = getS3Object(s3Path, client);
if( item != null )
return item;
throw new NoSuchFileException("s3://" + s3Path.getBucket() + "/" + s3Path.getKey());
}
/**
* Lookup for the S3 object matching the specified path using at most two bounded
* {@code listObjects} calls (replaces the previous unbounded pagination loop).
*
* @param s3Path the S3 path to look up
* @param client the S3 client
* @return the matching {@link S3Object}, or {@code null} if not found
*/
private S3Object getS3Object(S3Path s3Path, S3Client client) throws IOException {
// Call 1: list up to 2 objects whose key starts with the target key.
//
// Why maxKeys(2) instead of paginating all results?
// The previous implementation used an unbounded while(true) loop fetching 250 keys
// per page. On prefixes with millions of objects this caused excessive S3 LIST API
// calls, high latency, and potential timeouts. Two results are enough to cover
// the common cases:
// - Exact file match: the key itself exists as an object (e.g. "data.txt")
// - Directory match: a child object (e.g. "data/file1") appears within the
// first 2 lexicographic results
ListObjectsRequest request = ListObjectsRequest.builder()
.bucket(s3Path.getBucket())
.prefix(s3Path.getKey())
.maxKeys(2)
.build();
ListObjectsResponse listing = client.listObjects(request);
List<S3Object> results = listing.contents();
for( S3Object item : results ) {
if( matchName(s3Path.getKey(), item)) {
return item;
}
}
// Call 2 (fallback): list 1 object with prefix "key/" to detect directories
// that Call 1 missed.
//
// Why can Call 1 miss a directory?
// S3 lists keys in lexicographic (UTF-8 byte) order, and several common characters
// sort *before* '/' (0x2F) — notably '-' (0x2D) and '.' (0x2E).
//
// Example: given keys "a-a/file-3", "a.txt", and "a/file-1", S3 returns them as:
// a-a/file-3 ← '-' (0x2D) < '/' (0x2F)
// a.txt ← '.' (0x2E) < '/' (0x2F)
// a/file-1 ← '/' (0x2F) — the actual directory child
//
// With maxKeys(2), Call 1 only sees "a-a/file-3" and "a.txt" — neither matches
// key "a" via matchName(). The directory child "a/file-1" is pushed beyond the
// result window by sibling keys that sort earlier.
//
// By searching with prefix "a/" directly, we skip all those siblings and find
// "a/file-1", confirming that "a" is a directory.
request = ListObjectsRequest.builder()
.bucket(s3Path.getBucket())
.prefix(s3Path.getKey()+'/')
.maxKeys(1)
.build();
listing = client.listObjects(request);
results = listing.contents();
for( S3Object item : results ) {
if( matchName(s3Path.getKey(), item)) {
return item;
}
}
return null;
}
private boolean matchName(String fileName, S3Object summary) {
String foundKey = summary.key();
// they are different names return false
if( !foundKey.startsWith(fileName) ) {
return false;
}
// when they are the same length, they are identical
if( foundKey.length() == fileName.length() )
return true;
return foundKey.charAt(fileName.length()) == '/';
}
}

View File

@@ -0,0 +1,117 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.util;
import software.amazon.awssdk.http.SdkHttpClient;
import software.amazon.awssdk.http.apache.ApacheHttpClient;
import software.amazon.awssdk.http.apache.ProxyConfiguration;
import java.net.URI;
import java.net.URISyntaxException;
import java.time.Duration;
import java.util.Properties;
/**
* Class to convert Amazon properties in S3 synchronous client configuration
*
* @author Jorge Ejarque <jorge.ejarque@seqera.io>
*/
public class S3SyncClientConfiguration extends S3ClientConfiguration{
// Sync client should always have a connection limit
private int maxConnections = 50;
private ApacheHttpClient.Builder httpClientBuilder;
private ApacheHttpClient.Builder httpClientBuilder(){
if( this.httpClientBuilder == null)
this.httpClientBuilder = ApacheHttpClient.builder();
return this.httpClientBuilder;
}
public int getMaxConnections() {
return maxConnections;
}
public SdkHttpClient.Builder getHttpClientBuilder(){
if ( this.httpClientBuilder == null )
return null;
return this.httpClientBuilder;
}
private S3SyncClientConfiguration(){
super();
}
private void setClientHttpBuilder(Properties props) {
if( props.containsKey("connection_timeout") ) {
log.trace("AWS client config - connection_timeout: {}", props.getProperty("connection_timeout"));
httpClientBuilder().connectionTimeout(Duration.ofMillis(Long.parseLong(props.getProperty("connection_timeout"))));
}
if( props.containsKey("max_connections")) {
log.trace("AWS client config - max_connections: {}", props.getProperty("max_connections"));
this.maxConnections = Integer.parseInt(props.getProperty("max_connections"));
httpClientBuilder().maxConnections(this.maxConnections);
}
if( props.containsKey("socket_timeout")) {
log.trace("AWS client config - socket_timeout: {}", props.getProperty("socket_timeout"));
httpClientBuilder().socketTimeout(Duration.ofMillis(Long.parseLong(props.getProperty("socket_timeout"))));
}
try {
if( props.containsKey("proxy_host")) {
final String host = props.getProperty("proxy_host");
final int port = Integer.parseInt(props.getProperty("proxy_port", "-1"));
final String scheme = props.getProperty("proxy_scheme", "http");
final ProxyConfiguration.Builder proxyConfig = ProxyConfiguration.builder();
log.trace("AWS client config - proxy {}://{}:{}", scheme, host, port);
proxyConfig.endpoint(new URI(scheme, null, host, port, null, null, null));
if (props.containsKey("proxy_username")) {
proxyConfig.username(props.getProperty("proxy_username"));
}
if (props.containsKey("proxy_password")) {
proxyConfig.password(props.getProperty("proxy_password"));
}
if (props.containsKey("proxy_domain")) {
proxyConfig.ntlmDomain(props.getProperty("proxy_domain"));
}
if (props.containsKey("proxy_workstation")) {
proxyConfig.ntlmWorkstation(props.getProperty("proxy_workstation"));
}
httpClientBuilder().proxyConfiguration(proxyConfig.build());
}
} catch (URISyntaxException e){
log.warn("Exception creating AWS client config - proxy URI", e);
}
}
public static S3SyncClientConfiguration create(Properties props) {
S3SyncClientConfiguration config = new S3SyncClientConfiguration();
if( props != null ) {
config.setClientOverrideConfiguration(props);
config.setClientHttpBuilder(props);
}
return config;
}
}

View File

@@ -0,0 +1,38 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.util
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
import com.google.common.base.CaseFormat
/**
* Helper class for AWS
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class AwsHelper {
static ObjectCannedACL parseS3Acl(String value) {
if( !value )
return null
return value.contains('-')
? ObjectCannedACL.valueOf(CaseFormat.LOWER_HYPHEN.to(CaseFormat.UPPER_UNDERSCORE, value))
: ObjectCannedACL.valueOf(CaseFormat.UPPER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE,value))
}
}

View File

@@ -0,0 +1,88 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.util
import java.nio.file.Path
import java.util.regex.Pattern
import groovy.transform.CompileStatic
/**
* Parse and merge AWS config and credentials file
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@CompileStatic
class ConfigParser {
final private static Pattern KEY_VALUE = ~/\s*(\w+)\s*=.*/
final Map<String, List<String>> content = new LinkedHashMap<>()
ConfigParser parseConfig(Path path) {
return parseConfig(path.text)
}
ConfigParser parseConfig(String text) {
String current = null
for( String line : text.readLines() ) {
final section = parseSection(line)
if( section ) {
current = section
}
else if( current && line.trim() ) {
final block = content.computeIfAbsent(current, (String it) -> new ArrayList<>())
final key = findKey(line)
final exists = key && block.any { findKey(it)==key }
if( !key || !exists )
block.add(line)
}
}
return this
}
protected String parseSection(String str) {
def line = str.trim()
if( !line.startsWith('[') || !line.endsWith(']') ) {
return null
}
line = line.substring(1, line.size()-1)
if( line.startsWith('profile '))
line = line.substring('profile '.size())
return line
}
String text() {
final result = new StringBuilder()
for( Map.Entry<String,List<String>> entry : content ) {
result.append('[').append(entry.key).append(']\n')
for( String line : entry.value ) {
result.append(line).append('\n')
}
}
return result.toString()
}
protected String findKey(String line) {
final m = KEY_VALUE.matcher(line)
return m.matches() ? m.group(1) : null
}
}

View File

@@ -0,0 +1,214 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.util
import groovy.transform.CompileStatic
import nextflow.Global
import nextflow.Session
import nextflow.cloud.aws.batch.AwsOptions
import nextflow.executor.BashFunLib
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
/**
* AWS S3 helper class
*/
@CompileStatic
class S3BashLib extends BashFunLib<S3BashLib> {
private String storageClass = 'STANDARD'
private String storageEncryption = ''
private String storageKmsKeyId = ''
private String debug = ''
private String cli = 'aws'
private String retryMode
private String s5cmdPath
private String acl = ''
private String requesterPays = ''
private String forceGlacierTransfer = ''
S3BashLib withCliPath(String cliPath) {
if( cliPath )
this.cli = cliPath
return this
}
S3BashLib withRetryMode(String value) {
if( value )
retryMode = value
return this
}
S3BashLib withDebug(Boolean value) {
this.debug = value ? '--debug ' : ''
return this
}
S3BashLib withStorageClass(String value) {
if( value )
this.storageClass = value
return this
}
S3BashLib withStorageEncryption(String value) {
if( value )
this.storageEncryption = value ? "--sse $value " : ''
return this
}
S3BashLib withStorageKmsKeyId(String value) {
if( value )
this.storageKmsKeyId = value ? "--sse-kms-key-id $value " : ''
return this
}
S3BashLib withS5cmdPath(String value) {
this.s5cmdPath = value
return this
}
S3BashLib withAcl(ObjectCannedACL value) {
if( value )
this.acl = "--acl $value "
return this
}
S3BashLib withRequesterPays(Boolean value) {
this.requesterPays = value ? "--request-payer requester " : ''
return this
}
S3BashLib withForceGlacierTransfer(Boolean value) {
this.forceGlacierTransfer = value ? '--force-glacier-transfer ' : ''
return this
}
protected String retryEnv() {
if( !retryMode )
return ''
"""
# aws cli retry config
export AWS_RETRY_MODE=${retryMode}
export AWS_MAX_ATTEMPTS=${maxTransferAttempts}
""".stripIndent().rightTrim()
}
/**
* Implement S3 upload/download helper using `aws s3` CLI tool
*
* @return The Bash script implementing the S3 helper functions
*/
protected String s3Lib() {
"""
# aws helper
nxf_s3_upload() {
local name=\$1
local s3path=\$2
if [[ "\$name" == - ]]; then
$cli s3 cp --only-show-errors ${debug}${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass - "\$s3path"
elif [[ -d "\$name" ]]; then
$cli s3 cp --only-show-errors --recursive ${debug}${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass "\$name" "\$s3path/\$name"
else
$cli s3 cp --only-show-errors ${debug}${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass "\$name" "\$s3path/\$name"
fi
}
nxf_s3_download() {
local source=\$1
local target=\$2
local file_name=\$(basename \$1)
local is_dir=\$($cli s3 ls \$source | grep -F "PRE \${file_name}/" -c)
if [[ \$is_dir == 1 ]]; then
$cli s3 cp --only-show-errors --recursive ${forceGlacierTransfer}"\$source" "\$target"
else
$cli s3 cp --only-show-errors "\$source" "\$target"
fi
}
""".stripIndent(true)
}
/**
* Implement S3 upload/download helper using s3cmd CLI tool
* https://github.com/peak/s5cmd
*
* @return The Bash script implementing the S3 helper functions
*/
protected String s5cmdLib() {
final cli = s5cmdPath
"""
# aws helper for s5cmd
nxf_s3_upload() {
local name=\$1
local s3path=\$2
if [[ "\$name" == - ]]; then
local tmp=\$(nxf_mktemp)
cp /dev/stdin \$tmp/\$name
$cli cp ${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass \$tmp/\$name "\$s3path"
elif [[ -d "\$name" ]]; then
$cli cp ${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass "\$name/" "\$s3path/\$name/"
else
$cli cp ${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass "\$name" "\$s3path/\$name"
fi
}
nxf_s3_download() {
local source=\$1
local target=\$2
local file_name=\$(basename \$1)
local is_dir=\$($cli ls \$source | grep -F "DIR \${file_name}/" -c)
if [[ \$is_dir == 1 ]]; then
$cli cp "\$source/*" "\$target"
else
$cli cp "\$source" "\$target"
fi
}
""".stripIndent()
}
@Override
String render() {
return s5cmdPath
? super.render() + s5cmdLib()
: super.render() + retryEnv() + s3Lib()
}
static private S3BashLib lib0(AwsOptions opts, boolean includeCore) {
new S3BashLib()
.includeCoreFun(includeCore)
.withMaxParallelTransfers( opts.maxParallelTransfers )
.withDelayBetweenAttempts(opts.delayBetweenAttempts )
.withMaxTransferAttempts( opts.maxTransferAttempts )
.withCliPath( opts.awsCli )
.withStorageClass(opts.storageClass )
.withStorageEncryption( opts.storageEncryption )
.withStorageKmsKeyId( opts.storageKmsKeyId )
.withRetryMode( opts.retryMode )
.withDebug( opts.debug )
.withS5cmdPath( opts.s5cmdPath )
.withAcl( opts.s3Acl )
.withRequesterPays( opts.requesterPays )
.withForceGlacierTransfer( opts.forceGlacierTransfer )
}
static String script(AwsOptions opts) {
lib0(opts,true).render()
}
static String script() {
final opts = new AwsOptions(Global.session as Session)
lib0(opts,false).render()
}
}

View File

@@ -0,0 +1,59 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.util
import software.amazon.awssdk.auth.credentials.AwsCredentials
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider
import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
/**
* AWS credentials provider that delegates the credentials to the
* specified provider class and fallback to the {@link AnonymousCredentialsProvider}
* when no credentials are available.
*
* See also {@link software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain}
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@Slf4j
@CompileStatic
class S3CredentialsProvider implements AwsCredentialsProvider {
private AwsCredentialsProvider target
private volatile AwsCredentials anonymous
S3CredentialsProvider(AwsCredentialsProvider target) {
this.target = target
}
@Override
AwsCredentials resolveCredentials() {
if (anonymous != null) {
return anonymous
}
try {
return target.resolveCredentials()
} catch (Exception e) {
log.debug("No AWS credentials available - falling back to anonymous access")
}
anonymous = AnonymousCredentialsProvider.create().resolveCredentials()
return anonymous
}
}

View File

@@ -0,0 +1,86 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.util
import java.nio.file.Path
import nextflow.cloud.aws.nio.S3Path
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.Global
import nextflow.cloud.aws.batch.AwsBatchFileCopyStrategy
import nextflow.file.FileHelper
import nextflow.file.FileSystemPathFactory
/**
* Implements the a factory strategy to parse and build S3 path URIs
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@Slf4j
@CompileStatic
class S3PathFactory extends FileSystemPathFactory {
@Override
protected Path parseUri(String str) {
// normalise 's3' path
if( str.startsWith('s3://') && str[5]!='/' ) {
final path = "s3:///${str.substring(5)}"
return create(path)
}
return null
}
static private Map config() {
final result = Global.config?.get('aws') as Map
return result != null ? result : Collections.emptyMap()
}
@Override
protected String toUriString(Path path) {
return path instanceof S3Path ? "s3:/$path".toString() : null
}
@Override
protected String getBashLib(Path target) {
return S3BashLib.script()
}
@Override
protected String getUploadCmd(String source, Path target) {
return target instanceof S3Path
? AwsBatchFileCopyStrategy.uploadCmd(source,target)
: null
}
/**
* Creates a {@link S3Path} from a S3 formatted URI.
*
* @param path
* A S3 URI path e.g. s3:///BUCKET_NAME/some/data.
* NOTE it expect the s3 prefix provided with triple `/` .
* This is required by the underlying implementation expecting the host name in the URI to be empty
* and the bucket name to be the first path element
* @return
* The corresponding {@link S3Path}
*/
static S3Path create(String path) {
if( !path ) throw new IllegalArgumentException("Missing S3 path argument")
if( !path.startsWith('s3:///') ) throw new IllegalArgumentException("S3 path must start with s3:/// prefix -- offending value '$path'")
// note: this URI constructor parse the path parameter and extract the `scheme` and `authority` components
final uri = new URI(null,null, path,null,null)
return (S3Path)FileHelper.getOrCreateFileSystemFor(uri,config()).provider().getPath(uri)
}
}

View File

@@ -0,0 +1,62 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.util
import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.Serializer
import com.esotericsoftware.kryo.io.Input
import com.esotericsoftware.kryo.io.Output
import nextflow.cloud.aws.nio.S3Path
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.util.SerializerRegistrant
import org.pf4j.Extension
/**
* Register the S3Path serializer
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@Slf4j
@Extension
@CompileStatic
class S3PathSerializer extends Serializer<S3Path> implements SerializerRegistrant {
@Override
void register(Map<Class, Object> serializers) {
serializers.put(S3Path, S3PathSerializer)
}
@Override
void write(Kryo kryo, Output output, S3Path target) {
final scheme = target.getFileSystem().provider().getScheme()
final path = target.toString()
log.trace "S3Path serialization > scheme: $scheme; path: $path"
output.writeString(scheme)
output.writeString(path)
}
@Override
S3Path read(Kryo kryo, Input input, Class<S3Path> type) {
final scheme = input.readString()
final path = input.readString()
if( scheme != 's3' ) throw new IllegalStateException("Unexpected scheme for S3 path -- offending value '$scheme'")
log.trace "S3Path de-serialization > scheme: $scheme; path: $path"
return (S3Path) S3PathFactory.create("s3://${path}")
}
}

View File

@@ -0,0 +1,42 @@
#
# Copyright 2013-2026, Seqera Labs
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# The MIT License (MIT)
#
# Copyright (c) 2014 Javier Arnaiz @arnaix
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# if not present, FileSystems.newFileSystem throw NotProviderFoundException
nextflow.cloud.aws.nio.S3FileSystemProvider

View File

@@ -0,0 +1,38 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow
import spock.lang.Specification
import static test.ScriptHelper.runDataflow
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class S3ChannelTest extends Specification {
def testFromPathS3() {
when:
runDataflow {
Channel.fromPath('s3://bucket/some/data.txt')
}
then:
noExceptionThrown()
}
}

View File

@@ -0,0 +1,67 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow
import java.nio.file.Paths
import spock.lang.IgnoreIf
import spock.lang.Requires
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class S3NextflowTest extends Specification {
def 'should return s3 uris'() {
expect:
Nextflow.file('s3://foo/data/file.log') == Paths.get(new URI('s3:///foo/data/file.log'))
}
def 'should resolve rel paths against env base' () {
given:
SysEnv.push(NXF_FILE_ROOT: 's3://some/base/dir')
expect:
Nextflow.file( 's3://abs/path/file.txt' ) == Paths.get(new URI('s3:///abs/path/file.txt'))
and:
Nextflow.file( 'file.txt' ) == Paths.get(new URI('s3:///some/base/dir/file.txt'))
cleanup:
SysEnv.pop()
}
@IgnoreIf({System.getenv('NXF_SMOKE')})
@Requires({System.getenv('AWS_S3FS_ACCESS_KEY') && System.getenv('AWS_S3FS_SECRET_KEY')})
def 'should resolve list of files' () {
when:
def result = Nextflow.files('s3://ngi-igenomes/*')
then:
result.size() == 3
}
@IgnoreIf({System.getenv('NXF_SMOKE')})
@Requires({System.getenv('AWS_S3FS_ACCESS_KEY') && System.getenv('AWS_S3FS_SECRET_KEY')})
def 'should check s3 bucket exists files' () {
when:
def result = Nextflow.file('s3://ngi-igenomes/')
then:
result.exists() == true
result.isDirectory() == true
}
}

View File

@@ -0,0 +1,60 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow
import java.nio.file.Path
import nextflow.file.FileHelper
import spock.lang.Specification
import spock.lang.Unroll
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class S3SessionTest extends Specification {
@Unroll
def 'should get cloud cache path' () {
given:
def session = Spy(Session)
expect:
session.cloudCachePath(CONFIG, FileHelper.asPath(WORKDIR)) == EXPECTED
where:
CONFIG | WORKDIR | EXPECTED
null | '/foo' | null
[enabled:true] | 's3://foo/work' | FileHelper.asPath('s3://foo/work')
[enabled:true, path:'s3://this/that'] | '/foo' | FileHelper.asPath('s3://this/that')
}
def 'should error with non-cloud bucket' () {
given:
def session = Spy(Session)
when:
session.cloudCachePath([enabled:true], Path.of('/foo/dir'))
then:
def e = thrown(IllegalArgumentException)
e.message == "Storage path not supported by Cloud-cache - offending value: '/foo/dir'"
}
}

View File

@@ -0,0 +1,58 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws
import nextflow.SysEnv
import nextflow.cloud.aws.config.AwsConfig
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class AwsClientFactoryTest extends Specification {
def 'should create factory' () {
given:
SysEnv.push([:])
when:
def factory = new AwsClientFactory(new AwsConfig(accessKey: 'foo', secretKey: 'bar', region:'xyz', profile:'my-profile'))
then:
factory.accessKey() == 'foo'
factory.secretKey() == 'bar'
factory.region() == 'xyz'
factory.profile() == 'my-profile'
cleanup:
SysEnv.pop()
}
def 'should create factory using environment' () {
given:
SysEnv.push([AWS_REGION:'eu-foo-1', AWS_PROFILE: 'profile-x'])
when:
def factory = new AwsClientFactory(new AwsConfig([:]))
then:
factory.accessKey() == null
factory.secretKey() == null
factory.region() == 'eu-foo-1'
factory.profile() == 'profile-x'
cleanup:
SysEnv.pop()
}
}

View File

@@ -0,0 +1,383 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import java.nio.file.Paths
import nextflow.processor.TaskBean
import spock.lang.Specification
import test.TestHelper
class AwsBatchFileCopyStrategyTest extends Specification {
def 'should strip out file/folder name from target S3 path' () {
given:
def OUTPUTS = ["outputs_*","final_folder"]
def TARGET = Paths.get('/data/results')
def FILE = Paths.get('/some/data/nobel_prize_results.gz')
def EXIT = Paths.get('/some/path/.exitcode')
def RUN = Paths.get('/some/data/.command.run')
def copy = new AwsBatchFileCopyStrategy(Mock(TaskBean), new AwsOptions())
expect:
copy.touchFile(RUN) == "echo start | nxf_s3_upload - s3://some/data/.command.run"
copy.copyFile("nobel_prize_results.gz",Paths.get("/some/data/nobel_prize_results.gz")) == "nxf_s3_upload nobel_prize_results.gz s3://some/data"
copy.exitFile(EXIT) == "| nxf_s3_upload - s3://some/path/.exitcode || true"
copy.stageInputFile(FILE, 'foo.txt') == """
downloads+=("nxf_s3_download s3://some/data/nobel_prize_results.gz foo.txt")
"""
.stripIndent().trim()
copy.getUnstageOutputFilesScript(OUTPUTS,TARGET) == '''
uploads=()
IFS=$'\\n'
for name in $(eval "ls -1d outputs_* final_folder" | sort | uniq); do
uploads+=("nxf_s3_upload '$name' s3://data/results")
done
unset IFS
nxf_parallel "${uploads[@]}"
'''
.stripIndent().leftTrim()
}
def 'should return unstage script' () {
given:
def copy = new AwsBatchFileCopyStrategy(Mock(TaskBean), new AwsOptions())
def target = Paths.get('/foo/bar')
when:
def script = copy.getUnstageOutputFilesScript(['file.txt'],target)
then:
script.trim() == '''
uploads=()
IFS=$'\\n'
for name in $(eval "ls -1d file.txt" | sort | uniq); do
uploads+=("nxf_s3_upload '$name' s3://foo/bar")
done
unset IFS
nxf_parallel "${uploads[@]}"
'''
.stripIndent().trim()
when:
script = copy.getUnstageOutputFilesScript(['file-*.txt'],target)
then:
script.trim() == '''
uploads=()
IFS=$'\\n'
for name in $(eval "ls -1d file-*.txt" | sort | uniq); do
uploads+=("nxf_s3_upload '$name' s3://foo/bar")
done
unset IFS
nxf_parallel "${uploads[@]}"
'''
.stripIndent().trim()
when:
script = copy.getUnstageOutputFilesScript(['file-[a,b].txt'],target)
then:
script.trim() == '''
uploads=()
IFS=$'\\n'
for name in $(eval "ls -1d file-[a,b].txt" | sort | uniq); do
uploads+=("nxf_s3_upload '$name' s3://foo/bar")
done
unset IFS
nxf_parallel "${uploads[@]}"
'''
.stripIndent().trim()
when:
script = copy.getUnstageOutputFilesScript(['file-01(A).txt', 'f o o.txt'],target)
then:
script.trim() == '''
uploads=()
IFS=$'\\n'
for name in $(eval "ls -1d file-01\\(A\\).txt f\\ o\\ o.txt" | sort | uniq); do
uploads+=("nxf_s3_upload '$name' s3://foo/bar")
done
unset IFS
nxf_parallel "${uploads[@]}"
'''
.stripIndent().trim()
}
def 'should check the beforeScript' () {
given:
def bean = Mock(TaskBean)
def opts = Mock(AwsOptions)
AwsBatchFileCopyStrategy copy = Spy(AwsBatchFileCopyStrategy, constructorArgs: [bean, opts])
when:
def script = copy.getBeforeStartScript()
then:
1 * opts.getAwsCli() >> 'aws'
1 * opts.getStorageClass() >> null
1 * opts.getStorageEncryption() >> null
script == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=1
local timeout=10
local attempt=0
local exitCode=0
while (( \$attempt < \$max_attempts ))
do
if "\$@"
then
return 0
else
exitCode=\$?
fi
if [[ \$exitCode == 0 ]]
then
break
fi
nxf_sleep \$timeout
attempt=\$(( attempt + 1 ))
timeout=\$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
when:
script = copy.getBeforeStartScript()
then:
1 * opts.getAwsCli() >> '/foo/aws'
1 * opts.getStorageClass() >> 'STANDARD_IA'
1 * opts.getStorageEncryption() >> 'AES256'
script == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=1
local timeout=10
local attempt=0
local exitCode=0
while (( \$attempt < \$max_attempts ))
do
if "\$@"
then
return 0
else
exitCode=\$?
fi
if [[ \$exitCode == 0 ]]
then
break
fi
nxf_sleep \$timeout
attempt=\$(( attempt + 1 ))
timeout=\$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
/foo/aws s3 cp --only-show-errors --sse AES256 --storage-class STANDARD_IA - "$s3path"
elif [[ -d "$name" ]]; then
/foo/aws s3 cp --only-show-errors --recursive --sse AES256 --storage-class STANDARD_IA "$name" "$s3path/$name"
else
/foo/aws s3 cp --only-show-errors --sse AES256 --storage-class STANDARD_IA "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(/foo/aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
/foo/aws s3 cp --only-show-errors --recursive "$source" "$target"
else
/foo/aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should return env variables' () {
given:
def ENV = [FOO: 'hola', BAR:'world', PATH:'xxx']
def bean = Mock(TaskBean)
def opts = Mock(AwsOptions)
AwsBatchFileCopyStrategy copy = Spy(AwsBatchFileCopyStrategy, constructorArgs: [bean, opts])
when:
def script = copy.getEnvScript(ENV,false)
then:
// note: PATH is always removed
opts.getRemoteBinDir() >> null
opts.getCliPath() >> null
script == '''
export FOO="hola"
export BAR="world"
'''.stripIndent().leftTrim()
when:
script = copy.getEnvScript(ENV,false)
then:
opts.getRemoteBinDir() >> '/foo/bar'
opts.getAwsCli() >> 'aws'
script == '''
aws s3 cp --recursive --only-show-errors s3://foo/bar $PWD/nextflow-bin
chmod +x $PWD/nextflow-bin/* || true
export PATH=$PWD/nextflow-bin:$PATH
export FOO="hola"
export BAR="world"
'''.stripIndent().leftTrim()
when:
script = copy.getEnvScript(ENV,false)
then:
opts.getAwsCli() >> '/conda/bin/aws'
opts.getRemoteBinDir() >> '/foo/bar'
script == '''
/conda/bin/aws s3 cp --recursive --only-show-errors s3://foo/bar $PWD/nextflow-bin
chmod +x $PWD/nextflow-bin/* || true
export PATH=$PWD/nextflow-bin:$PATH
export FOO="hola"
export BAR="world"
'''.stripIndent().leftTrim()
when:
script = copy.getEnvScript(ENV,false)
then:
opts.getAwsCli() >> '/conda/bin/aws'
opts.getRemoteBinDir() >> '/foo/bar'
opts.getRegion() >> 'eu-west-1'
script == '''
/conda/bin/aws s3 cp --recursive --only-show-errors s3://foo/bar $PWD/nextflow-bin
chmod +x $PWD/nextflow-bin/* || true
export PATH=$PWD/nextflow-bin:$PATH
export FOO="hola"
export BAR="world"
'''.stripIndent().leftTrim()
}
def 'should return stage input input file'() {
given:
def file = TestHelper.createInMemTempFile('foo.txt')
def bean = Mock(TaskBean)
def opts = Mock(AwsOptions)
def copy = new AwsBatchFileCopyStrategy(bean, opts)
when:
def script = copy.stageInputFile( file, 'bar.txt')
then:
script == "downloads+=(\"nxf_s3_download s3:/$file bar.txt\")" as String
}
}

View File

@@ -0,0 +1,69 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import nextflow.cloud.types.PriceModel
import software.amazon.awssdk.services.batch.BatchClient
import software.amazon.awssdk.services.ec2.model.Instance
import software.amazon.awssdk.services.ec2.model.InstanceType
import software.amazon.awssdk.services.ec2.model.InstanceLifecycleType
import spock.lang.Specification
import spock.lang.Unroll
/**
* Tests for AwsBatchHelper
*
* @author Rob Syme <rob.syme@seqera.io>
*/
class AwsBatchHelperTest extends Specification {
@Unroll
def 'should detect spot instance pricing model'() {
given:
def helper = new AwsBatchHelper(Mock(BatchClient), null)
def instance = Instance.builder()
.instanceLifecycle(LIFECYCLE)
.build()
when:
def result = helper.getPrice(instance)
then:
result == EXPECTED
where:
LIFECYCLE | EXPECTED
InstanceLifecycleType.SPOT | PriceModel.spot
InstanceLifecycleType.SCHEDULED | PriceModel.standard
null | PriceModel.standard // on-demand instances return null
}
def 'should preserve raw aws instance type values'() {
given:
def helper = new AwsBatchHelper(Mock(BatchClient), null)
expect:
helper.getInstanceType(INSTANCE) == TYPE
where:
TYPE | _
'm4.large' | _
'r8id.xlarge' | _
and:
INSTANCE = Instance.builder().instanceType(InstanceType.fromValue(TYPE)).instanceType(TYPE).build()
}
}

View File

@@ -0,0 +1,88 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import software.amazon.awssdk.services.batch.BatchClient
import software.amazon.awssdk.services.batch.model.DescribeJobDefinitionsRequest
import software.amazon.awssdk.services.batch.model.DescribeJobDefinitionsResponse
import software.amazon.awssdk.services.batch.model.DescribeJobsRequest
import software.amazon.awssdk.services.batch.model.DescribeJobsResponse
import nextflow.util.ThrottlingExecutor
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class AwsBatchProxyTest extends Specification {
def 'should get client instance' () {
given:
def client = Mock(BatchClient)
def exec = Mock(ThrottlingExecutor)
when:
def c = new AwsBatchProxy(client,exec).client
then:
0 * exec._
c == client
when:
def d = new AwsBatchProxy(client,exec).getClient()
then:
0 * exec._
d == client
}
def 'should invoke executor with normal priority' () {
given:
def client = Mock(BatchClient)
def exec = Mock(ThrottlingExecutor)
def req = DescribeJobDefinitionsRequest.builder().build() as DescribeJobDefinitionsRequest
def resp = DescribeJobDefinitionsResponse.builder().build()
def ZERO = 0 as byte
when:
def result = new AwsBatchProxy(client,exec).describeJobDefinitions(req)
then:
1 * exec.doInvoke1(client, 'describeJobDefinitions', [req] as Object[], ZERO) >> resp
result == resp
}
def 'should invoke executor with higher priority' () {
given:
def client = Mock(BatchClient)
def exec = Mock(ThrottlingExecutor)
def req = DescribeJobsRequest.builder().build() as DescribeJobsRequest
def resp = DescribeJobsResponse.builder().build()
def _10 = 10 as byte
when:
def result = new AwsBatchProxy(client,exec).describeJobs(req)
then:
1 * exec.doInvoke1(client, 'describeJobs', [req] as Object[], _10) >> resp
result == resp
}
}

View File

@@ -0,0 +1,677 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import java.nio.file.FileSystems
import java.nio.file.Files
import java.nio.file.Paths
import nextflow.Session
import nextflow.SysEnv
import nextflow.cloud.aws.config.AwsConfig
import nextflow.cloud.aws.util.S3PathFactory
import nextflow.container.DockerConfig
import nextflow.processor.TaskBean
import nextflow.util.Duration
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class AwsBatchScriptLauncherTest extends Specification {
def setup() {
new Session()
}
def 'test bash wrapper with input'() {
/*
* simple bash run
*/
when:
def cfg = new AwsConfig(region: 'eu-west-1', batch: [cliPath:'/conda/bin/aws', retryMode: 'built-in'])
def opts = new AwsOptions(awsConfig: cfg)
def binding = new AwsBatchScriptLauncher([
name: 'Hello 1',
workDir: Paths.get('/work/dir'),
script: 'echo Hello world!',
environment: [FOO: 1, BAR:'any'],
input: 'Ciao ciao' ] as TaskBean, opts) .makeBinding()
then:
binding.unstage_controls == '''\
nxf_s3_upload .command.out s3://work/dir || true
nxf_s3_upload .command.err s3://work/dir || true
'''.stripIndent()
binding.launch_cmd == '/bin/bash -ue .command.sh < .command.in'
binding.unstage_outputs == null
binding.helpers_script == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=5
local timeout=10
local attempt=0
local exitCode=0
while (( \$attempt < \$max_attempts ))
do
if "\$@"
then
return 0
else
exitCode=\$?
fi
if [[ \$exitCode == 0 ]]
then
break
fi
nxf_sleep \$timeout
attempt=\$(( attempt + 1 ))
timeout=\$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n\'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
/conda/bin/aws --region eu-west-1 s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
/conda/bin/aws --region eu-west-1 s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
/conda/bin/aws --region eu-west-1 s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(/conda/bin/aws --region eu-west-1 s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
/conda/bin/aws --region eu-west-1 s3 cp --only-show-errors --recursive "$source" "$target"
else
/conda/bin/aws --region eu-west-1 s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should create task environment' () {
/*
* simple bash run
*/
when:
def bucket = Paths.get('/bucket/work')
def opts = new AwsOptions(remoteBinDir: '/bucket/bin', awsConfig: new AwsConfig([:]))
def binding = new AwsBatchScriptLauncher([
name: 'Hello 1',
workDir: bucket,
targetDir: bucket,
environment: [PATH:'/this:/that', FOO: 'xxx'],
script: 'echo Hello world!' ] as TaskBean, opts) .makeBinding()
then:
binding.task_env == '''\
aws s3 cp --recursive --only-show-errors s3://bucket/bin $PWD/nextflow-bin
chmod +x $PWD/nextflow-bin/* || true
export PATH=$PWD/nextflow-bin:$PATH
export FOO="xxx"
'''.stripIndent()
}
def 'should cleanup temp files' () {
when:
def bucket = Paths.get('/bucket/work')
def opts = new AwsOptions(remoteBinDir: '/bucket/bin', awsConfig: new AwsConfig([:]))
def binding = new AwsBatchScriptLauncher([
name: 'Hello 1',
workDir: bucket,
targetDir: bucket,
environment: [PATH:'/this:/that', FOO: 'xxx'],
script: 'echo Hello world!' ] as TaskBean, opts) .makeBinding()
then:
binding.cleanup_cmd == 'rm -rf $NXF_SCRATCH || true\n'
}
def 'test bash wrapper with outputs and stats'() {
/*
* simple bash run
*/
when:
def bucket = Paths.get('/bucket/work')
def opts = new AwsOptions(awsConfig: new AwsConfig(batch: [retryMode: 'built-in']))
def binding = new AwsBatchScriptLauncher([
name: 'Hello 1',
workDir: bucket,
targetDir: bucket,
statsEnabled: true,
outputFiles: ['foo.txt', 'bar.fastq'],
script: 'echo Hello world!',
input: 'Ciao ciao' ] as TaskBean, opts) .makeBinding()
then:
binding.unstage_controls == '''\
nxf_s3_upload .command.out s3://bucket/work || true
nxf_s3_upload .command.err s3://bucket/work || true
nxf_s3_upload .command.trace s3://bucket/work || true
'''.stripIndent()
binding.stage_inputs == '''\
# stage input files
downloads=(true)
rm -f .command.sh
rm -f .command.run
rm -f .command.in
downloads+=("nxf_cp_retry nxf_s3_download s3://bucket/work/.command.sh .command.sh")
downloads+=("nxf_cp_retry nxf_s3_download s3://bucket/work/.command.run .command.run")
downloads+=("nxf_cp_retry nxf_s3_download s3://bucket/work/.command.in .command.in")
nxf_parallel "${downloads[@]}"
'''.stripIndent()
binding.unstage_outputs == '''
uploads=()
IFS=$'\\n'
for name in $(eval "ls -1d foo.txt bar.fastq" | sort | uniq); do
uploads+=("nxf_s3_upload '$name' s3://bucket/work")
done
unset IFS
nxf_parallel "${uploads[@]}"
'''.stripIndent().leftTrim()
binding.launch_cmd == '/bin/bash .command.run nxf_trace'
binding.task_env == ''
binding.helpers_script == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=5
local timeout=10
local attempt=0
local exitCode=0
while (( \$attempt < \$max_attempts ))
do
if "\$@"
then
return 0
else
exitCode=\$?
fi
if [[ \$exitCode == 0 ]]
then
break
fi
nxf_sleep \$timeout
attempt=\$(( attempt + 1 ))
timeout=\$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n\'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'test bash wrapper with custom scratch'() {
given:
def folder = Files.createTempDirectory('test')
/*
* simple bash run
*/
when:
def opts = new AwsOptions(awsConfig: new AwsConfig(aws:[batch:[cliPath:'/conda/bin/aws', region: 'eu-west-1']]))
def bash = new AwsBatchScriptLauncher([
name: 'Hello 1',
workDir: folder,
script: 'echo Hello world!',
scratch: '/foo/bar/tmp'
] as TaskBean, opts)
bash.build()
then:
Files.exists(folder.resolve('.command.sh'))
Files.exists(folder.resolve('.command.run'))
folder.resolve('.command.run').text.contains('NXF_SCRATCH="$(set +u; nxf_mktemp /foo/bar/tmp)"')
cleanup:
folder?.deleteDir()
}
def 'test should disable scratch'() {
given:
def folder = Files.createTempDirectory('test')
/*
* simple bash run
*/
when:
def cfg = new AwsConfig(batch: [cliPath:'/conda/bin/aws'], region: 'eu-west-1')
def opts = new AwsOptions(awsConfig: cfg)
def bash = new AwsBatchScriptLauncher([
name: 'Hello 1',
workDir: folder,
script: 'echo Hello world!',
scratch: false
] as TaskBean, opts)
bash.build()
then:
Files.exists(folder.resolve('.command.sh'))
Files.exists(folder.resolve('.command.run'))
folder.resolve('.command.run').text.contains("NXF_SCRATCH=''")
cleanup:
folder?.deleteDir()
}
def 'test download retry enabled'() {
/*
* simple bash run
*/
when:
def bucket = Paths.get('/bucket/work')
def cfg = new AwsConfig(batch: [maxTransferAttempts:3, delayBetweenAttempts: '9 sec' as Duration, retryMode: 'built-in'])
def opts = new AwsOptions(awsConfig: cfg)
def binding = new AwsBatchScriptLauncher([
name: 'Hello 1',
workDir: bucket,
// targetDir: bucket,
script: 'echo Hello world!',
] as TaskBean, opts) .makeBinding()
then:
binding.stage_inputs == '''\
# stage input files
downloads=(true)
rm -f .command.sh
downloads+=("nxf_cp_retry nxf_s3_download s3://bucket/work/.command.sh .command.sh")
nxf_parallel "${downloads[@]}"
'''.stripIndent()
binding.helpers_script == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=3
local timeout=9
local attempt=0
local exitCode=0
while (( \$attempt < \$max_attempts ))
do
if "\$@"
then
return 0
else
exitCode=\$?
fi
if [[ \$exitCode == 0 ]]
then
break
fi
nxf_sleep \$timeout
attempt=\$(( attempt + 1 ))
timeout=\$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n\'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should aws cli native retry'() {
/*
* simple bash run
*/
when:
def bucket = Paths.get('/bucket/work')
def cfg = new AwsConfig(batch: [maxTransferAttempts: 3, retryMode: 'adaptive', delayBetweenAttempts: '9 sec' as Duration])
def opts = new AwsOptions(awsConfig: cfg)
def binding = new AwsBatchScriptLauncher([
name: 'Hello 1',
workDir: bucket,
// targetDir: bucket,
script: 'echo Hello world!',
] as TaskBean, opts) .makeBinding()
then:
binding.stage_inputs == '''\
# stage input files
downloads=(true)
rm -f .command.sh
downloads+=("nxf_s3_download s3://bucket/work/.command.sh .command.sh")
nxf_parallel "${downloads[@]}"
'''.stripIndent()
binding.helpers_script == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=3
local timeout=9
local attempt=0
local exitCode=0
while (( \$attempt < \$max_attempts ))
do
if "\$@"
then
return 0
else
exitCode=\$?
fi
if [[ \$exitCode == 0 ]]
then
break
fi
nxf_sleep \$timeout
attempt=\$(( attempt + 1 ))
timeout=\$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n\'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws cli retry config
export AWS_RETRY_MODE=adaptive
export AWS_MAX_ATTEMPTS=3
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should include fix ownership command' () {
given:
def cfg = new AwsConfig(batch: [cliPath:'/conda/bin/aws'], region: 'eu-west-1')
def opts = new AwsOptions(awsConfig: cfg)
def builder = new AwsBatchScriptLauncher([
name: 'Hello 1',
workDir: Paths.get('/work/dir'),
script: 'echo Hello world!',
containerConfig: new DockerConfig(fixOwnership: true),
input: 'Ciao ciao' ] as TaskBean, opts)
when:
def binding = builder.makeBinding()
then:
binding.fix_ownership == '[ ${NXF_OWNER:=\'\'} ] && (shopt -s extglob; GLOBIGNORE=\'..\'; chown -fR --from root $NXF_OWNER /work/dir/{*,.*}) || true'
}
def 'should not create separate stage script' () {
given:
SysEnv.push([NXF_WRAPPER_STAGE_FILE_THRESHOLD: '100'])
and:
def workDir = S3PathFactory.parse('s3://my-bucket/work')
and:
def inputFiles = [
'sample_1.fq': Paths.get('/my-bucket/data/sample_1.fq'),
'sample_2.fq': Paths.get('/my-bucket/data/sample_2.fq'),
]
def stageScript = '''\
# stage input files
downloads=(true)
rm -f sample_1.fq
rm -f sample_2.fq
rm -f .command.sh
downloads+=("nxf_s3_download s3://my-bucket/data/sample_1.fq sample_1.fq")
downloads+=("nxf_s3_download s3://my-bucket/data/sample_2.fq sample_2.fq")
downloads+=("nxf_s3_download s3://my-bucket/work/.command.sh .command.sh")
nxf_parallel "${downloads[@]}"
'''.stripIndent()
and:
def bean = [
workDir: workDir,
targetDir: workDir,
inputFiles: inputFiles,
script: 'echo Hello world!'
] as TaskBean
def opts = new AwsOptions()
def builder = new AwsBatchScriptLauncher(bean, opts)
when:
def binding = builder.makeBinding()
then:
binding.stage_inputs == stageScript
cleanup:
SysEnv.pop()
}
}

View File

@@ -0,0 +1,170 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import nextflow.util.CmdLineHelper
import software.amazon.awssdk.services.batch.model.Tmpfs
import software.amazon.awssdk.services.batch.model.Ulimit
import spock.lang.Specification
/**
* @author Manuele Simi <manuele.simi@gmail.com>
*/
class AwsContainerOptionsMapperTest extends Specification {
def 'should set env vars'() {
when:
def map = CmdLineHelper.parseGnuArgs('--env VAR_FOO -e VAR_FOO2=value2 --env VAR_FOO3=value3')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
def environment = properties.environment
environment.size() == 3
environment.get(0).name() == 'VAR_FOO'
environment.get(0).value() == null
environment.get(1).name() == 'VAR_FOO3'
environment.get(1).value() == 'value3'
environment.get(2).name() == 'VAR_FOO2'
environment.get(2).value() == 'value2'
}
def 'should set ulimits'() {
when:
def map = CmdLineHelper.parseGnuArgs('--ulimit nofile=1280:2560 --ulimit nproc=16:32')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.ulimits.size() == 2
properties.ulimits.get(0) == Ulimit.builder().hardLimit(2560).name('nofile').softLimit(1280).build()
properties.ulimits.get(1) == Ulimit.builder().hardLimit(32).name('nproc').softLimit(16).build()
}
def 'should set user'() {
when:
def map = CmdLineHelper.parseGnuArgs('--user nf-user')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.user == 'nf-user'
}
def 'should set privileged'() {
when:
def map = CmdLineHelper.parseGnuArgs('--privileged')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.privileged
}
def 'should set readonly'() {
when:
def map = CmdLineHelper.parseGnuArgs('--read-only')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.readonlyRootFilesystem
}
def 'should set env'() {
when:
def map = CmdLineHelper.parseGnuArgs('-e x=y')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.environment.get(0).name()=='x'
properties.environment.get(0).value()=='y'
}
def 'should set tmpfs linux params'() {
when:
def map = CmdLineHelper.parseGnuArgs('--tmpfs /run:rw,noexec,nosuid,size=64 --tmpfs /app:ro,size=128')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.linuxParameters.tmpfs().get(0) == Tmpfs.builder().containerPath('/run').size(64).mountOptions(['rw', 'noexec', 'nosuid']).build()
properties.linuxParameters.tmpfs().get(1) == Tmpfs.builder().containerPath('/app').size(128).mountOptions(['ro']).build()
}
def 'should set memory swap '() {
when:
def map = CmdLineHelper.parseGnuArgs('--memory-swap 2048')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.linuxParameters.maxSwap() == 2048
}
def 'should set shared memory size'() {
when:
def map = CmdLineHelper.parseGnuArgs('--shm-size 12048024')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.linuxParameters.sharedMemorySize() == 11
}
def 'should set shared memory size with unit in MiB'() {
when:
def map = CmdLineHelper.parseGnuArgs('--shm-size 256m')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.linuxParameters.sharedMemorySize() == 256
}
def 'should set shared memory size with unit in GiB'() {
when:
def map = CmdLineHelper.parseGnuArgs('--shm-size 1g')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.linuxParameters.sharedMemorySize() == 1024
}
def 'should set memory swappiness'() {
when:
def map = CmdLineHelper.parseGnuArgs('--memory-swappiness 12048024')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.linuxParameters.swappiness() == 12048024
}
def 'should set init'() {
when:
def map = CmdLineHelper.parseGnuArgs('--init')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.linuxParameters.initProcessEnabled()
}
def 'should set no params'() {
when:
def map = CmdLineHelper.parseGnuArgs('')
def properties = AwsContainerOptionsMapper.createContainerProperties(map)
then:
properties.linuxParameters == null
properties.ulimits == null
properties.privileged == false
properties.readonlyRootFilesystem == false
properties.user == null
}
}

View File

@@ -0,0 +1,265 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch
import java.nio.file.Paths
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
import nextflow.Session
import nextflow.cloud.aws.config.AwsConfig
import nextflow.exception.ProcessUnrecoverableException
import spock.lang.Specification
import spock.lang.Unroll
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class AwsOptionsTest extends Specification {
def 'should return aws cli' () {
given:
AwsOptions opts
when:
opts = new AwsOptions(awsConfig: new AwsConfig([:]))
then:
opts.awsCli == 'aws'
when:
opts = new AwsOptions(awsConfig: new AwsConfig(batch: [cliPath: '/foo/bin/aws']))
then:
opts.awsCli == '/foo/bin/aws'
when:
opts = new AwsOptions(awsConfig: new AwsConfig(region: 'eu-west-1', batch: [cliPath: '/foo/bin/aws']))
then:
opts.awsCli == '/foo/bin/aws --region eu-west-1'
}
def 'should get max connection' () {
given:
def sess = Mock(Session) {
getConfig() >> [aws:[batch:[maxParallelTransfers: 5]]]
}
AwsOptions opts
when:
opts = new AwsOptions(awsConfig: new AwsConfig([:]))
then:
opts.maxParallelTransfers == AwsOptions.MAX_TRANSFER
when:
opts = new AwsOptions(sess)
then:
opts.maxParallelTransfers == 5
}
def 'should get aws options' () {
given:
def sess = Mock(Session) {
getConfig() >> [aws:
[
batch:[
cliPath: '/foo/bin/aws',
maxParallelTransfers: 5,
maxTransferAttempts: 3,
delayBetweenAttempts: '9 sec',
jobRole: 'aws::foo::bar',
volumes: '/foo,/this:/that'],
client: [
uploadStorageClass: 'STANDARD',
storageEncryption: 'AES256'],
region: 'aws-west-2'
]
]
}
def exec = Mock(AwsBatchExecutor)
exec.getSession() >> sess
exec.getRemoteBinDir() >> Paths.get('/remote/bin/path')
when:
def opts = new AwsOptions(sess)
then:
opts.maxParallelTransfers == 5
opts.maxTransferAttempts == 3
opts.delayBetweenAttempts.seconds == 9
opts.storageClass == 'STANDARD'
opts.storageEncryption == 'AES256'
opts.region == 'aws-west-2'
opts.jobRole == 'aws::foo::bar'
opts.volumes == ['/foo','/this:/that']
when:
opts = new AwsOptions(exec)
then:
opts.remoteBinDir == '/remote/bin/path'
}
def 'should set aws kms key' () {
when:
def sess1 = Mock(Session) {
getConfig() >> [aws: [ client: [ storageKmsKeyId: 'my-kms-key']]]
}
and:
def opts = new AwsOptions(sess1)
then:
opts.storageKmsKeyId == 'my-kms-key'
opts.storageEncryption == null
when:
def sess2 = Mock(Session) {
getConfig() >> [aws: [ client: [ storageKmsKeyId: 'my-kms-key', storageEncryption: 'aws:kms']]]
}
and:
def opts2 = new AwsOptions(sess2)
then:
opts2.storageKmsKeyId == 'my-kms-key'
opts2.storageEncryption == 'aws:kms' // <-- allow explicit `storageEncryption`
}
@Unroll
def 'should return aws options'() {
given:
def cfg = [
aws: [client: [
uploadStorageClass: awsStorClass,
storageEncryption : awsStorEncrypt],
batch: [ cliPath: awscliPath ]]
]
def session = new Session(cfg)
when:
def opts = new AwsOptions(session)
then:
opts.cliPath == awscliPath
opts.storageClass == awsStorClass
opts.storageEncryption == awsStorEncrypt
where:
awscliPath | awsStorClass | awsStorEncrypt
null | null | null
'/foo/bin/aws' | 'STANDARD' | 'AES256'
}
def 'should validate aws options' () {
when:
def opts = new AwsOptions(awsConfig: new AwsConfig([:]))
then:
opts.getCliPath() == null
opts.getStorageClass() == null
opts.getStorageEncryption() == null
when:
opts = new AwsOptions(awsConfig: new AwsConfig(batch: [cliPath: '/foo/bin/aws'], client: [storageClass: 'STANDARD', storageEncryption: 'AES256']))
then:
opts.getCliPath() == '/foo/bin/aws'
opts.getStorageClass() == 'STANDARD'
opts.getStorageEncryption() == 'AES256'
when:
opts = new AwsOptions(awsConfig: new AwsConfig(client:[storageClass: 'foo']))
then:
opts.getStorageClass() == null
when:
opts = new AwsOptions(awsConfig: new AwsConfig(client:[storageEncryption: 'abr']))
then:
opts.getStorageEncryption() == null
when:
opts = new AwsOptions(awsConfig: new AwsConfig(client:[storageKmsKeyId: 'arn:aws:kms:eu-west-1:1234567890:key/e97ecf28-951e-4700-bf22-1bd416ec519f']))
then:
opts.getStorageKmsKeyId() == 'arn:aws:kms:eu-west-1:1234567890:key/e97ecf28-951e-4700-bf22-1bd416ec519f'
when:
new AwsOptions(awsConfig: new AwsConfig(batch: [cliPath: 'bin/aws']))
then:
thrown(ProcessUnrecoverableException)
when:
new AwsOptions(awsConfig: new AwsConfig(batch: [cliPath: '/foo/aws']))
then:
thrown(ProcessUnrecoverableException)
}
def 'should add a volume' () {
given:
def opts = new AwsOptions(awsConfig: new AwsConfig([:]))
when:
opts.addVolume(Paths.get('/some/dir'))
then:
opts.volumes == ['/some/dir']
when:
opts.addVolume(Paths.get('/other/dir'))
opts.addVolume(Paths.get('/other/dir'))
then:
opts.volumes == ['/some/dir', '/other/dir']
}
@Unroll
def 'should get aws cli path' () {
def session = new Session(CONFIG)
when:
def opts = new AwsOptions(session)
then:
opts.cliPath == S3CLI_PATH
opts.s5cmdPath == S5CMD_PATH
where:
CONFIG | S3CLI_PATH | S5CMD_PATH
[aws:[batch:[:]]] | null | null
[aws:[batch:[cliPath: '/usr/bin/aws']]] | '/usr/bin/aws' | null
[aws:[batch:[cliPath: 's5cmd']]] | null | null
[aws:[batch:[platformType: 'fargate', cliPath: 's5cmd']]] | null | 's5cmd'
[aws:[batch:[platformType: 'fargate', cliPath: '/some/path/s5cmd']]] | null | '/some/path/s5cmd'
[aws:[batch:[platformType: 'fargate', cliPath: 's5cmd --foo']]] | null | 's5cmd --foo'
[aws:[batch:[platformType: 'fargate', cliPath: '/some/path/s5cmd --foo']]] | null | '/some/path/s5cmd --foo'
}
def 'should parse s3 acl' ( ) {
when:
def opts = new AwsOptions(new Session(aws:[client:[s3Acl: 'PublicRead']]))
then:
opts.getS3Acl() == ObjectCannedACL.PUBLIC_READ
when:
opts = new AwsOptions(new Session(aws:[client:[s3Acl: 'public-read']]))
then:
opts.getS3Acl() == ObjectCannedACL.PUBLIC_READ
when:
opts = new AwsOptions(new Session(aws:[client:[s3Acl: 'unknown']]))
then:
thrown(IllegalArgumentException)
}
}

View File

@@ -0,0 +1,612 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch.model
import software.amazon.awssdk.services.batch.model.ContainerProperties
import software.amazon.awssdk.services.batch.model.EphemeralStorage
import software.amazon.awssdk.services.batch.model.KeyValuePair
import software.amazon.awssdk.services.batch.model.LinuxParameters
import software.amazon.awssdk.services.batch.model.LogConfiguration
import software.amazon.awssdk.services.batch.model.MountPoint
import software.amazon.awssdk.services.batch.model.NetworkConfiguration
import software.amazon.awssdk.services.batch.model.ResourceRequirement
import software.amazon.awssdk.services.batch.model.ResourceType
import software.amazon.awssdk.services.batch.model.RuntimePlatform
import software.amazon.awssdk.services.batch.model.Ulimit
import software.amazon.awssdk.services.batch.model.Volume
import spock.lang.Specification
/**
* @author Nextflow Authors
*/
class ContainerPropertiesModelTest extends Specification {
def 'should create empty model'() {
when:
def model = new ContainerPropertiesModel()
then:
model.image == null
model.command == null
model.resourceRequirements == null
model.jobRoleArn == null
model.executionRoleArn == null
model.linuxParameters == null
model.environment == null
model.privileged == false
model.user == null
model.readonlyRootFilesystem == false
model.ulimits == null
model.logConfiguration == null
model.mountPoints == null
model.volumes == null
model.networkConfiguration == null
model.ephemeralStorage == null
model.runtimePlatform == null
}
def 'should set and get image'() {
given:
def model = new ContainerPropertiesModel()
when:
def result = model.image('ubuntu:20.04')
then:
result == model
model.image == 'ubuntu:20.04'
}
def 'should set and get command'() {
given:
def model = new ContainerPropertiesModel()
when:
def result = model.command('echo', 'hello', 'world')
then:
result == model
model.command == ['echo', 'hello', 'world']
model.command.size() == 3
}
def 'should set and get resource requirements'() {
given:
def model = new ContainerPropertiesModel()
def req1 = ResourceRequirement.builder()
.type(ResourceType.VCPU)
.value('1')
.build()
def req2 = ResourceRequirement.builder()
.type(ResourceType.MEMORY)
.value('1024')
.build()
when:
def result = model.resourceRequirements(req1, req2)
then:
result == model
model.resourceRequirements.size() == 2
model.resourceRequirements[0] == req1
model.resourceRequirements[1] == req2
}
def 'should set and get job role arn'() {
given:
def model = new ContainerPropertiesModel()
def arn = 'arn:aws:iam::123456789012:role/BatchJobRole'
when:
def result = model.jobRoleArn(arn)
then:
result == model
model.jobRoleArn == arn
}
def 'should set and get execution role arn'() {
given:
def model = new ContainerPropertiesModel()
def arn = 'arn:aws:iam::123456789012:role/BatchExecutionRole'
when:
def result = model.executionRoleArn(arn)
then:
result == model
model.executionRoleArn == arn
}
def 'should set and get user'() {
given:
def model = new ContainerPropertiesModel()
when:
def result = model.user('batch-user')
then:
result == model
model.user == 'batch-user'
}
def 'should set and get readonly root filesystem'() {
given:
def model = new ContainerPropertiesModel()
when:
def result = model.readonlyRootFilesystem(true)
then:
result == model
model.readonlyRootFilesystem == true
}
def 'should set and get environment'() {
given:
def model = new ContainerPropertiesModel()
def env = [
KeyValuePair.builder().name('VAR1').value('value1').build(),
KeyValuePair.builder().name('VAR2').value('value2').build()
] as ArrayList<KeyValuePair>
when:
def result = model.environment(env)
then:
result == model
model.environment == env
model.environment.size() == 2
model.environment[0].name() == 'VAR1'
model.environment[0].value() == 'value1'
model.environment[1].name() == 'VAR2'
model.environment[1].value() == 'value2'
}
def 'should set and get linux parameters'() {
given:
def model = new ContainerPropertiesModel()
def linuxParams = LinuxParameters.builder()
.initProcessEnabled(true)
.build()
when:
def result = model.linuxParameters(linuxParams)
then:
result == model
model.linuxParameters == linuxParams
}
def 'should set and get privileged'() {
given:
def model = new ContainerPropertiesModel()
when:
def result = model.privileged(true)
then:
result == model
model.privileged == true
}
def 'should set and get ulimits'() {
given:
def model = new ContainerPropertiesModel()
def ulimits = [
Ulimit.builder().name('nofile').softLimit(1024).hardLimit(2048).build(),
Ulimit.builder().name('nproc').softLimit(16).hardLimit(32).build()
] as ArrayList<Ulimit>
when:
def result = model.ulimits(ulimits)
then:
result == model
model.ulimits == ulimits
model.ulimits.size() == 2
model.ulimits[0].name() == 'nofile'
model.ulimits[0].softLimit() == 1024
model.ulimits[0].hardLimit() == 2048
}
def 'should set and get log configuration'() {
given:
def model = new ContainerPropertiesModel()
def logConfig = LogConfiguration.builder()
.logDriver('awslogs')
.options(['awslogs-group': '/aws/batch/job'])
.build()
when:
def result = model.logConfiguration(logConfig)
then:
result == model
model.logConfiguration == logConfig
}
def 'should set and get mount points'() {
given:
def model = new ContainerPropertiesModel()
def mountPoints = [
MountPoint.builder()
.sourceVolume('tmp')
.containerPath('/tmp')
.readOnly(false)
.build()
]
when:
def result = model.mountPoints(mountPoints)
then:
result == model
model.mountPoints == mountPoints
model.mountPoints.size() == 1
model.mountPoints[0].sourceVolume() == 'tmp'
model.mountPoints[0].containerPath() == '/tmp'
model.mountPoints[0].readOnly() == false
}
def 'should set and get volumes'() {
given:
def model = new ContainerPropertiesModel()
def volumes = [
Volume.builder()
.name('tmp')
.build()
]
when:
def result = model.volumes(volumes)
then:
result == model
model.volumes == volumes
model.volumes.size() == 1
model.volumes[0].name() == 'tmp'
}
def 'should set and get network configuration'() {
given:
def model = new ContainerPropertiesModel()
def networkConfig = NetworkConfiguration.builder()
.assignPublicIp('ENABLED')
.build()
when:
def result = model.networkConfiguration(networkConfig)
then:
result == model
model.networkConfiguration == networkConfig
}
def 'should set and get ephemeral storage'() {
given:
def model = new ContainerPropertiesModel()
def ephemeralStorage = EphemeralStorage.builder()
.sizeInGiB(20)
.build()
when:
def result = model.ephemeralStorage(ephemeralStorage)
then:
result == model
model.ephemeralStorage == ephemeralStorage
}
def 'should set and get runtime platform'() {
given:
def model = new ContainerPropertiesModel()
def runtimePlatform = RuntimePlatform.builder()
.operatingSystemFamily('LINUX')
.cpuArchitecture('X86_64')
.build()
when:
def result = model.runtimePlatform(runtimePlatform)
then:
result == model
model.runtimePlatform == runtimePlatform
}
def 'should support method chaining'() {
given:
def model = new ContainerPropertiesModel()
def req = ResourceRequirement.builder()
.type(ResourceType.VCPU)
.value('1')
.build()
def env = [
KeyValuePair.builder().name('VAR1').value('value1').build()
] as ArrayList<KeyValuePair>
when:
def result = model
.image('ubuntu:20.04')
.command('echo', 'hello')
.resourceRequirements(req)
.jobRoleArn('arn:aws:iam::123456789012:role/BatchJobRole')
.executionRoleArn('arn:aws:iam::123456789012:role/BatchExecutionRole')
.user('batch-user')
.readonlyRootFilesystem(true)
.environment(env)
.privileged(false)
then:
result == model
model.image == 'ubuntu:20.04'
model.command == ['echo', 'hello']
model.resourceRequirements.size() == 1
model.jobRoleArn == 'arn:aws:iam::123456789012:role/BatchJobRole'
model.executionRoleArn == 'arn:aws:iam::123456789012:role/BatchExecutionRole'
model.user == 'batch-user'
model.readonlyRootFilesystem == true
model.environment.size() == 1
model.privileged == false
}
def 'should generate proper toString'() {
given:
def model = new ContainerPropertiesModel()
def req = ResourceRequirement.builder()
.type(ResourceType.VCPU)
.value('1')
.build()
when:
model.image('ubuntu:20.04')
.command('echo', 'hello')
.resourceRequirements(req)
.jobRoleArn('arn:aws:iam::123456789012:role/BatchJobRole')
.privileged(true)
.user('batch-user')
then:
def toString = model.toString()
toString.contains('ContainerPropertiesModel{')
toString.contains("image='ubuntu:20.04'")
toString.contains('command=[echo, hello]')
toString.contains('resourceRequirements=')
toString.contains("jobRoleArn='arn:aws:iam::123456789012:role/BatchJobRole'")
toString.contains('privileged=true')
toString.contains("user='batch-user'")
}
def 'should handle null values in toString'() {
given:
def model = new ContainerPropertiesModel()
when:
def toString = model.toString()
then:
toString.contains('ContainerPropertiesModel{')
toString.contains("image='null'")
toString.contains('command=null')
toString.contains('resourceRequirements=null')
toString.contains("jobRoleArn='null'")
toString.contains('privileged=false')
toString.contains("user='null'")
toString.contains('readonlyRootFilesystem=false')
}
def 'should handle empty collections'() {
given:
def model = new ContainerPropertiesModel()
when:
model.environment([] as ArrayList<KeyValuePair>)
.ulimits([] as ArrayList<Ulimit>)
.mountPoints([])
.volumes([])
then:
model.environment == []
model.ulimits == []
model.mountPoints == []
model.volumes == []
}
def 'should handle single command argument'() {
given:
def model = new ContainerPropertiesModel()
when:
model.command('single-command')
then:
model.command == ['single-command']
model.command.size() == 1
}
def 'should handle single resource requirement'() {
given:
def model = new ContainerPropertiesModel()
def req = ResourceRequirement.builder()
.type(ResourceType.MEMORY)
.value('512')
.build()
when:
model.resourceRequirements(req)
then:
model.resourceRequirements.size() == 1
model.resourceRequirements[0] == req
}
def 'should handle boolean values correctly'() {
given:
def model = new ContainerPropertiesModel()
when:
model.privileged(false)
.readonlyRootFilesystem(false)
then:
model.privileged == false
model.readonlyRootFilesystem == false
when:
model.privileged(true)
.readonlyRootFilesystem(true)
then:
model.privileged == true
model.readonlyRootFilesystem == true
}
def 'should convert to ContainerProperties with all fields'() {
given:
def model = new ContainerPropertiesModel()
def req = ResourceRequirement.builder()
.type(ResourceType.VCPU)
.value('1')
.build()
def env = [
KeyValuePair.builder().name('VAR1').value('value1').build()
] as ArrayList<KeyValuePair>
def ulimits = [
Ulimit.builder().name('nofile').softLimit(1024).hardLimit(2048).build()
] as ArrayList<Ulimit>
def logConfig = LogConfiguration.builder()
.logDriver('awslogs')
.build()
def mountPoints = [
MountPoint.builder()
.sourceVolume('tmp')
.containerPath('/tmp')
.build()
]
def volumes = [
Volume.builder()
.name('tmp')
.build()
]
def networkConfig = NetworkConfiguration.builder()
.assignPublicIp('ENABLED')
.build()
def ephemeralStorage = EphemeralStorage.builder()
.sizeInGiB(20)
.build()
def runtimePlatform = RuntimePlatform.builder()
.operatingSystemFamily('LINUX')
.build()
def linuxParams = LinuxParameters.builder()
.initProcessEnabled(true)
.build()
when:
model.image('ubuntu:20.04')
.command('echo', 'hello')
.resourceRequirements(req)
.jobRoleArn('arn:aws:iam::123456789012:role/BatchJobRole')
.executionRoleArn('arn:aws:iam::123456789012:role/BatchExecutionRole')
.linuxParameters(linuxParams)
.environment(env)
.privileged(true)
.user('batch-user')
.readonlyRootFilesystem(true)
.ulimits(ulimits)
.logConfiguration(logConfig)
.mountPoints(mountPoints)
.volumes(volumes)
.networkConfiguration(networkConfig)
.ephemeralStorage(ephemeralStorage)
.runtimePlatform(runtimePlatform)
def containerProperties = model.toBatchContainerProperties()
then:
containerProperties instanceof ContainerProperties
containerProperties.image() == 'ubuntu:20.04'
containerProperties.command() == ['echo', 'hello']
containerProperties.resourceRequirements().size() == 1
containerProperties.resourceRequirements()[0] == req
containerProperties.jobRoleArn() == 'arn:aws:iam::123456789012:role/BatchJobRole'
containerProperties.executionRoleArn() == 'arn:aws:iam::123456789012:role/BatchExecutionRole'
containerProperties.linuxParameters() == linuxParams
containerProperties.environment().size() == 1
containerProperties.environment()[0].name() == 'VAR1'
containerProperties.privileged() == true
containerProperties.user() == 'batch-user'
containerProperties.readonlyRootFilesystem() == true
containerProperties.ulimits().size() == 1
containerProperties.ulimits()[0].name() == 'nofile'
containerProperties.logConfiguration() == logConfig
containerProperties.mountPoints().size() == 1
containerProperties.mountPoints()[0].sourceVolume() == 'tmp'
containerProperties.volumes().size() == 1
containerProperties.volumes()[0].name() == 'tmp'
containerProperties.networkConfiguration() == networkConfig
containerProperties.ephemeralStorage() == ephemeralStorage
containerProperties.runtimePlatform() == runtimePlatform
}
def 'should convert to ContainerProperties with null fields'() {
given:
def model = new ContainerPropertiesModel()
when:
def containerProperties = model.toBatchContainerProperties()
then:
containerProperties instanceof ContainerProperties
containerProperties.image() == null
containerProperties.command() == []
containerProperties.resourceRequirements() == []
containerProperties.jobRoleArn() == null
containerProperties.executionRoleArn() == null
containerProperties.linuxParameters() == null
containerProperties.environment() == []
containerProperties.privileged() == null
containerProperties.user() == null
containerProperties.readonlyRootFilesystem() == null
containerProperties.ulimits() == []
containerProperties.logConfiguration() == null
containerProperties.mountPoints() == []
containerProperties.volumes() == []
containerProperties.networkConfiguration() == null
containerProperties.ephemeralStorage() == null
containerProperties.runtimePlatform() == null
}
def 'should convert to ContainerProperties with empty collections'() {
given:
def model = new ContainerPropertiesModel()
when:
model.environment([] as ArrayList<KeyValuePair>)
.ulimits([] as ArrayList<Ulimit>)
.mountPoints([])
.volumes([])
def containerProperties = model.toBatchContainerProperties()
then:
containerProperties instanceof ContainerProperties
containerProperties.environment() == []
containerProperties.ulimits() == []
containerProperties.mountPoints() == []
containerProperties.volumes() == []
}
}

View File

@@ -0,0 +1,351 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.batch.model
import software.amazon.awssdk.services.batch.model.JobDefinitionType
import software.amazon.awssdk.services.batch.model.PlatformCapability
import software.amazon.awssdk.services.batch.model.RegisterJobDefinitionRequest
import spock.lang.Specification
/**
* @author Nextflow Authors
*/
class RegisterJobDefinitionModelTest extends Specification {
def 'should create empty model'() {
when:
def model = new RegisterJobDefinitionModel()
then:
model.jobDefinitionName == null
model.type == null
model.platformCapabilities == null
model.containerProperties == null
model.parameters == null
model.tags == null
}
def 'should set and get job definition name'() {
given:
def model = new RegisterJobDefinitionModel()
when:
def result = model.jobDefinitionName('test-job-def')
then:
result == model
model.jobDefinitionName == 'test-job-def'
}
def 'should set and get type'() {
given:
def model = new RegisterJobDefinitionModel()
when:
def result = model.type(JobDefinitionType.CONTAINER)
then:
result == model
model.type == JobDefinitionType.CONTAINER
}
def 'should set and get platform capabilities'() {
given:
def model = new RegisterJobDefinitionModel()
def capabilities = [PlatformCapability.EC2, PlatformCapability.FARGATE]
when:
def result = model.platformCapabilities(capabilities)
then:
result == model
model.platformCapabilities == capabilities
model.platformCapabilities.size() == 2
model.platformCapabilities.contains(PlatformCapability.EC2)
model.platformCapabilities.contains(PlatformCapability.FARGATE)
}
def 'should set and get container properties'() {
given:
def model = new RegisterJobDefinitionModel()
def containerProps = new ContainerPropertiesModel()
when:
def result = model.containerProperties(containerProps)
then:
result == model
model.containerProperties == containerProps
}
def 'should set and get parameters'() {
given:
def model = new RegisterJobDefinitionModel()
def params = ['key1': 'value1', 'key2': 'value2']
when:
def result = model.parameters(params)
then:
result == model
model.parameters == params
model.parameters.size() == 2
model.parameters['key1'] == 'value1'
model.parameters['key2'] == 'value2'
}
def 'should set and get tags'() {
given:
def model = new RegisterJobDefinitionModel()
def tags = ['env': 'test', 'project': 'nextflow']
when:
def result = model.tags(tags)
then:
result == model
model.tags == tags
model.tags.size() == 2
model.tags['env'] == 'test'
model.tags['project'] == 'nextflow'
}
def 'should add tag entry when tags is null'() {
given:
def model = new RegisterJobDefinitionModel()
when:
def result = model.addTagsEntry('key1', 'value1')
then:
result == model
model.tags != null
model.tags.size() == 1
model.tags['key1'] == 'value1'
model.tags instanceof LinkedHashMap
}
def 'should add tag entry when tags already exists'() {
given:
def model = new RegisterJobDefinitionModel()
model.tags(['existing': 'tag'])
when:
def result = model.addTagsEntry('new', 'value')
then:
result == model
model.tags.size() == 2
model.tags['existing'] == 'tag'
model.tags['new'] == 'value'
}
def 'should handle multiple tag entries'() {
given:
def model = new RegisterJobDefinitionModel()
when:
model.addTagsEntry('key1', 'value1')
.addTagsEntry('key2', 'value2')
.addTagsEntry('key3', 'value3')
then:
model.tags.size() == 3
model.tags['key1'] == 'value1'
model.tags['key2'] == 'value2'
model.tags['key3'] == 'value3'
}
def 'should handle tag entry overwrite'() {
given:
def model = new RegisterJobDefinitionModel()
when:
model.addTagsEntry('key1', 'value1')
.addTagsEntry('key1', 'value2')
then:
model.tags.size() == 1
model.tags['key1'] == 'value2'
}
def 'should support method chaining'() {
given:
def model = new RegisterJobDefinitionModel()
def containerProps = new ContainerPropertiesModel()
def capabilities = [PlatformCapability.EC2]
def params = ['param1': 'value1']
def tags = ['tag1': 'value1']
when:
def result = model
.jobDefinitionName('test-job')
.type(JobDefinitionType.CONTAINER)
.platformCapabilities(capabilities)
.containerProperties(containerProps)
.parameters(params)
.tags(tags)
.addTagsEntry('tag2', 'value2')
then:
result == model
model.jobDefinitionName == 'test-job'
model.type == JobDefinitionType.CONTAINER
model.platformCapabilities == capabilities
model.containerProperties == containerProps
model.parameters == params
model.tags.size() == 2
model.tags['tag1'] == 'value1'
model.tags['tag2'] == 'value2'
}
def 'should handle empty collections'() {
given:
def model = new RegisterJobDefinitionModel()
when:
model.platformCapabilities([])
.parameters([:])
.tags([:])
then:
model.platformCapabilities == []
model.parameters == [:]
model.tags == [:]
}
def 'should convert to RegisterJobDefinitionRequest with all fields'() {
given:
def model = new RegisterJobDefinitionModel()
def containerProps = new ContainerPropertiesModel()
containerProps.image('ubuntu:20.04')
def capabilities = [PlatformCapability.EC2, PlatformCapability.FARGATE]
def params = ['param1': 'value1', 'param2': 'value2']
def tags = ['tag1': 'value1', 'tag2': 'value2']
when:
model.jobDefinitionName('test-job-def')
.type(JobDefinitionType.CONTAINER)
.platformCapabilities(capabilities)
.containerProperties(containerProps)
.parameters(params)
.tags(tags)
def request = model.toBatchRequest()
then:
request instanceof RegisterJobDefinitionRequest
request.jobDefinitionName() == 'test-job-def'
request.type() == JobDefinitionType.CONTAINER
request.platformCapabilities() == capabilities
request.containerProperties() != null
request.containerProperties().image() == 'ubuntu:20.04'
request.parameters() == params
request.tags() == tags
}
def 'should convert to RegisterJobDefinitionRequest with null fields'() {
given:
def model = new RegisterJobDefinitionModel()
when:
def request = model.toBatchRequest()
then:
request instanceof RegisterJobDefinitionRequest
!request.jobDefinitionName()
!request.type()
!request.platformCapabilities()
!request.containerProperties()
!request.parameters()
!request.tags()
}
def 'should convert to RegisterJobDefinitionRequest with minimal fields'() {
given:
def model = new RegisterJobDefinitionModel()
def containerProps = new ContainerPropertiesModel()
containerProps.image('nginx')
when:
model.jobDefinitionName('minimal-job')
.type(JobDefinitionType.CONTAINER)
.containerProperties(containerProps)
def request = model.toBatchRequest()
then:
request instanceof RegisterJobDefinitionRequest
request.jobDefinitionName() == 'minimal-job'
request.type() == JobDefinitionType.CONTAINER
request.containerProperties() != null
request.containerProperties().image() == 'nginx'
!request.platformCapabilities()
!request.parameters()
!request.tags()
}
def 'should convert to RegisterJobDefinitionRequest with empty collections'() {
given:
def model = new RegisterJobDefinitionModel()
def containerProps = new ContainerPropertiesModel()
when:
model.jobDefinitionName('empty-collections-job')
.type(JobDefinitionType.CONTAINER)
.platformCapabilities([])
.containerProperties(containerProps)
.parameters([:])
.tags([:])
def request = model.toBatchRequest()
then:
request instanceof RegisterJobDefinitionRequest
request.jobDefinitionName() == 'empty-collections-job'
request.type() == JobDefinitionType.CONTAINER
request.platformCapabilities() == []
request.containerProperties() != null
request.parameters() == [:]
request.tags() == [:]
}
def 'should handle chaining with toBatchRequest'() {
given:
def model = new RegisterJobDefinitionModel()
def containerProps = new ContainerPropertiesModel()
containerProps.image('alpine')
when:
def request = model
.jobDefinitionName('chained-job')
.type(JobDefinitionType.CONTAINER)
.containerProperties(containerProps)
.addTagsEntry('env', 'test')
.addTagsEntry('project', 'nextflow')
.toBatchRequest()
then:
request instanceof RegisterJobDefinitionRequest
request.jobDefinitionName() == 'chained-job'
request.type() == JobDefinitionType.CONTAINER
request.containerProperties().image() == 'alpine'
request.tags().size() == 2
request.tags()['env'] == 'test'
request.tags()['project'] == 'nextflow'
}
}

View File

@@ -0,0 +1,170 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.config
import java.nio.file.Paths
import nextflow.util.Duration
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class AwsBatchConfigTest extends Specification {
def 'should create default config' () {
when:
def batch = new AwsBatchConfig([:])
then:
batch.maxParallelTransfers == AwsBatchConfig.MAX_TRANSFER
batch.maxTransferAttempts == AwsBatchConfig.DEFAULT_AWS_MAX_ATTEMPTS
batch.delayBetweenAttempts == AwsBatchConfig.DEFAULT_DELAY_BETWEEN_ATTEMPTS
batch.maxSpotAttempts == null
batch.retryMode == 'standard'
and:
!batch.cliPath
!batch.volumes
!batch.jobRole
!batch.executionRole
!batch.logsGroup
!batch.shareIdentifier
!batch.isFargateMode()
!batch.s5cmdPath
batch.schedulingPriority == 0
!batch.terminateUnschedulableJobs
!batch.forceGlacierTransfer
}
def 'should create config with options' () {
given:
def OPTS = [
cliPath: '/some/bin/aws',
maxParallelTransfers:1,
maxTransferAttempts:2,
delayBetweenAttempts: '3s',
maxSpotAttempts: 4,
volumes: '/some/path:/mnt/path,/other/path',
jobRole: 'xyz',
executionRole: 'some:exec:role',
logsGroup: 'group-name-123',
retryMode: 'legacy',
shareIdentifier: 'id-x1',
schedulingPriority: 100,
]
when:
def batch = new AwsBatchConfig(OPTS)
then:
batch.cliPath == '/some/bin/aws'
batch.maxParallelTransfers == 1
batch.maxTransferAttempts == 2
batch.delayBetweenAttempts == Duration.of('3sec')
batch.maxSpotAttempts == 4
batch.volumes == ['/some/path:/mnt/path', '/other/path']
batch.jobRole == 'xyz'
batch.executionRole == 'some:exec:role'
batch.logsGroup == 'group-name-123'
batch.retryMode == 'legacy'
batch.shareIdentifier == 'id-x1'
batch.schedulingPriority == 100
!batch.isFargateMode()
}
def 'should parse volumes list' () {
given:
def executor = new AwsBatchConfig([:])
expect:
executor.makeVols(OBJ) == EXPECTED
where:
OBJ | EXPECTED
null | []
'foo' | ['foo']
'foo, bar' | ['foo','bar']
'/foo/,/bar///' | ['/foo','/bar']
['/this','/that'] | ['/this','/that']
['/foo/bar/'] | ['/foo/bar']
}
def 'should add a volume' () {
given:
def opts = new AwsBatchConfig([:])
when:
opts.addVolume(Paths.get('/some/dir'))
then:
opts.volumes == ['/some/dir']
when:
opts.addVolume(Paths.get('/other/dir'))
opts.addVolume(Paths.get('/other/dir'))
then:
opts.volumes == ['/some/dir', '/other/dir']
}
def 'should parse cli path' () {
given:
def opts = new AwsBatchConfig(OPTS)
expect:
opts.cliPath == S3_CLI_PATH
opts.s5cmdPath == S5_CLI_PATH
opts.isFargateMode() == FARGATE
where:
OPTS | S3_CLI_PATH | S5_CLI_PATH | FARGATE
[:] | null | null | false
[cliPath: "/opt/bin/aws"] | '/opt/bin/aws' | null | false
[cliPath: "/s5cmd"] | null | null | false
[cliPath: "/opt/s5cmd --foo"] | null | null | false
and:
[platformType: 'fargate', cliPath: "/opt/bin/aws"] | null | 's5cmd' | true
[platformType: 'fargate', cliPath: "/opt/s5cmd"] | null | '/opt/s5cmd' | true
[platformType: 'fargate', cliPath: "/opt/s5cmd --foo"] | null | '/opt/s5cmd --foo'| true
}
def 'should parse unschedulable flag' () {
given:
def opts = new AwsBatchConfig(OPTS)
expect:
opts.terminateUnschedulableJobs == UNSCHEDULABLE
where:
OPTS | UNSCHEDULABLE
[:] | false
[terminateUnschedulableJobs: false] | false
[terminateUnschedulableJobs: true] | true
}
def 'should parse forceGlacierTransfer flag' () {
given:
def opts = new AwsBatchConfig(OPTS)
expect:
opts.forceGlacierTransfer == FORCE_GLACIER
where:
OPTS | FORCE_GLACIER
[:] | false
[forceGlacierTransfer: false] | false
[forceGlacierTransfer: true] | true
}
}

View File

@@ -0,0 +1,133 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.config
import software.amazon.awssdk.regions.Region
import java.nio.file.Files
import nextflow.SysEnv
import spock.lang.Specification
import spock.lang.Unroll
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class AwsConfigTest extends Specification {
def 'should get aws region'() {
expect:
AwsConfig.getAwsRegion([:], [:]) == null
and:
AwsConfig.getAwsRegion([:], [region:'eu-west-2']) == 'eu-west-2'
and:
// config has priority
AwsConfig.getAwsRegion([AWS_DEFAULT_REGION: 'us-central-1'], [region:'eu-west-2']) == 'eu-west-2'
and:
AwsConfig.getAwsRegion([AWS_DEFAULT_REGION: 'us-central-1'], [:]) == 'us-central-1'
}
def 'should get aws region from aws file'() {
given:
def file = Files.createTempFile('test','test')
file.text = '''
[default]
aws_access_key_id = aaa
aws_secret_access_key = bbbb
region = reg-something
[foo]
aws_access_key_id = xxx
aws_secret_access_key = yyy
region = reg-foo
[bar]
aws_access_key_id = xxx
aws_secret_access_key = yyy
aws_session_token = zzz
'''
expect:
AwsConfig.getAwsRegion0([AWS_DEFAULT_REGION: 'us-central-1'], [:], file) == 'us-central-1'
and:
AwsConfig.getAwsRegion0([:], [:], file) == 'reg-something'
and:
AwsConfig.getAwsRegion0([:], [profile: 'foo'], file) == 'reg-foo'
cleanup:
file?.delete()
}
def 'should get aws config' () {
given:
SysEnv.push(ENV)
and:
def config = new AwsConfig(CONFIG)
expect:
config.accessKey == ACCESS_KEY
config.secretKey == SECRET_KEY
config.profile == PROFILE
config.region == REGION
config.credentials == (ACCESS_KEY && SECRET_KEY ? [ACCESS_KEY, SECRET_KEY] : [])
cleanup:
SysEnv.pop()
where:
ENV | CONFIG | ACCESS_KEY | SECRET_KEY | REGION | PROFILE
[:] | [accessKey: 'a', secretKey: 'b'] | 'a' | 'b' | null | null
[:] | [accessKey: 'x', secretKey: 'y', region: 'eu-region-x'] | 'x' | 'y' | 'eu-region-x' | null
[:] | [accessKey: 'p', secretKey: 'q', profile: 'hola'] | 'p' | 'q' | null | 'hola'
and:
[AWS_DEFAULT_REGION: 'eu-xyz'] | [:] | null | null | 'eu-xyz' | null
[AWS_DEFAULT_PROFILE: 'my-profile'] | [:] | null | null | null | 'my-profile'
}
@Unroll
def 'should add max error retry' () {
expect:
AwsConfig.checkDefaultErrorRetry(SOURCE, ENV) == EXPECTED
where:
SOURCE | ENV | EXPECTED
null | null | [max_error_retry: '5']
[foo: 1] | [:] | [max_error_retry: '5', foo: 1]
[foo: 1] | [AWS_MAX_ATTEMPTS:'3']| [max_error_retry: '3', foo: 1]
[max_error_retry: '2', foo: 1] | [:] | [max_error_retry: '2', foo: 1]
[:] | [:] | [max_error_retry: '5']
}
@Unroll
def 'should resolve S3 region' () {
expect:
new AwsConfig(CONFIG).resolveS3Region() == REGION
where:
CONFIG | REGION
[:] | Region.US_EAST_1.id()
[client: [endpoint: "http://custom.endpoint.com"]] | Region.US_EAST_1.id()
[region: "eu-south-1", client: [endpoint: "http://custom.endpoint.com"]] | Region.EU_SOUTH_1.id()
[region: "eu-south-1", client: [endpoint: "https://s3.eu-west-1.amazonaws.com"]] | Region.EU_WEST_1.id()
[region: "eu-south-1", client: [endpoint: "https://bucket.s3-global.amazonaws.com"]] | Region.EU_SOUTH_1.id()
}
}

View File

@@ -0,0 +1,205 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.config
import software.amazon.awssdk.regions.Region
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
import nextflow.SysEnv
import spock.lang.Specification
import spock.lang.Unroll
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class AwsS3ConfigTest extends Specification {
def 'should get default config' () {
when:
def client = new AwsS3Config([:])
then:
!client.storageClass
!client.storageKmsKeyId
!client.storageEncryption
!client.debug
!client.s3Acl
!client.pathStyleAccess
!client.anonymous
!client.isCustomEndpoint()
}
def 'should set config' () {
given:
def OPTS = [
debug:true,
storageClass: 'STANDARD',
storageKmsKeyId: 'key-1',
storageEncryption: 'AES256',
s3Acl: 'public-read',
s3PathStyleAccess: true,
anonymous: true
]
when:
def client = new AwsS3Config(OPTS)
then:
client.debug
client.storageClass == 'STANDARD'
client.storageKmsKeyId == 'key-1'
client.storageEncryption == 'AES256'
client.s3Acl == ObjectCannedACL.PUBLIC_READ
client.pathStyleAccess
client.anonymous
}
def 'should use legacy upload storage class' () {
given:
def OPTS = [
uploadStorageClass: 'STANDARD_IA',
]
when:
def client = new AwsS3Config(OPTS)
then:
client.storageClass == 'STANDARD_IA'
}
@Unroll
def 'should get aws s3 endpoint' () {
given:
SysEnv.push(ENV)
when:
def config = new AwsS3Config(CONFIG)
then:
config.endpoint == EXPECTED
cleanup:
SysEnv.pop()
where:
ENV | CONFIG | EXPECTED
[:] | [:] | null
[AWS_S3_ENDPOINT: 'http://foo'] | [:] | 'http://foo'
[:] | [endpoint: 'http://bar'] | 'http://bar'
[AWS_S3_ENDPOINT: 'http://foo'] | [endpoint: 'http://bar'] | 'http://bar' // <-- config should have priority
}
@Unroll
def 'should fail with invalid endpoint protocol' () {
when:
new AwsS3Config(CONFIG)
then:
def e = thrown(IllegalArgumentException)
e.message == EXPECTED
where:
CONFIG | EXPECTED
[endpoint: 'bar.com'] | "S3 endpoint must begin with http:// or https:// prefix - offending value: 'bar.com'"
[endpoint: 'ftp://bar.com'] | "S3 endpoint must begin with http:// or https:// prefix - offending value: 'ftp://bar.com'"
}
def 'should get s3 legacy properties' () {
given:
SysEnv.push([:])
when:
def config = new AwsConfig([client:[uploadMaxThreads: 5, uploadChunkSize: 1000, uploadStorageClass: 'STANDARD']])
def env = config.getS3LegacyProperties()
then:
env.upload_storage_class == 'STANDARD'
env.upload_chunk_size == '1000'
env.upload_max_threads == '5'
env.max_error_retry == '5' // <-- default to 5
when:
config = new AwsConfig([client:[uploadMaxThreads: 10, maxErrorRetry: 20, uploadStorageClass: 'ONEZONE_IA']])
env = config.getS3LegacyProperties()
then:
env.upload_storage_class == 'ONEZONE_IA'
env.upload_max_threads == '10'
env.max_error_retry == '20'
cleanup:
SysEnv.pop()
}
def 'should get maxDownloadHeapMemory' () {
given:
SysEnv.push([:])
when:
def config = new AwsConfig([client:[ maxDownloadHeapMemory: '100 MB']])
def env = config.getS3Config().getAwsClientConfig()
then:
env.max_download_heap_memory == Long.toString( 100 * 1024 * 1024)
cleanup:
SysEnv.pop()
}
@Unroll
def 'should check is custom endpoint' () {
given:
def config = new AwsS3Config(CONFIG)
expect:
config.isCustomEndpoint() == EXPECTED
where:
EXPECTED | CONFIG
false | [:]
false | [endpoint: 'https://s3.us-east-2.amazonaws.com']
true | [endpoint: 'https://foo.com']
// consider AWS china as custom ednpoint
// see https://github.com/nextflow-io/nextflow/issues/5836
true | [endpoint: 'https://xxxx.s3.cn-north-1.vpce.amazonaws.com.cn']
}
@Unroll
def 'should fail with invalid maxDownloadHeapMemory and minimumPartSize are incorrect' () {
when:
new AwsS3Config(CONFIG)
then:
def e = thrown(IllegalArgumentException)
e.message == EXPECTED
where:
CONFIG | EXPECTED
[ maxDownloadHeapMemory: '0MB' ] | "Configuration option `aws.client.maxDownloadHeapMemory` can't be 0"
[ minimumPartSize: '0MB' ] | "Configuration option `aws.client.minimumPartSize` can't be 0"
[ maxDownloadHeapMemory: '50 MB', minimumPartSize: '6 MB'] | "Configuration option `aws.client.maxDownloadHeapMemory` must be at least 10 times `aws.client.minimumPartSize`"
}
@Unroll
def 'should get region from endpoint' () {
expect:
new AwsS3Config(CONFIG).getEndpointRegion() == REGION
where:
CONFIG | REGION
[:] | null
[endpoint: "http://custom.endpoint.com"] | null
[endpoint: "https://s3.eu-west-1.amazonaws.com"] | Region.EU_WEST_1.id()
[endpoint: "https://bucket.s3-global.amazonaws.com"] | null
}
}

View File

@@ -0,0 +1,103 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.fusion
import nextflow.Global
import nextflow.SysEnv
import nextflow.fusion.FusionConfig
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class AwsFusionEnvTest extends Specification {
def setup() {
Global.config = Collections.emptyMap()
}
def 'should return empty env' () {
given:
def provider = new AwsFusionEnv()
when:
def env = provider.getEnvironment('az', Mock(FusionConfig))
then:
env == Collections.emptyMap()
}
def 'should return env environment' () {
given:
SysEnv.push([AWS_ACCESS_KEY_ID: 'x1', AWS_SECRET_ACCESS_KEY: 'y1', AWS_S3_ENDPOINT: 'http://my-host.com'])
and:
when:
def config = Mock(FusionConfig)
def env = new AwsFusionEnv().getEnvironment('s3', Mock(FusionConfig))
then:
env == [AWS_S3_ENDPOINT:'http://my-host.com']
when:
config = Mock(FusionConfig) { exportStorageCredentials() >> true }
env = new AwsFusionEnv().getEnvironment('s3', config)
then:
env == [AWS_ACCESS_KEY_ID: 'x1',
AWS_SECRET_ACCESS_KEY: 'y1',
AWS_S3_ENDPOINT:'http://my-host.com']
cleanup:
SysEnv.pop()
}
def 'should return env environment with SSE config' () {
given:
Global.config = [aws:[client: [storageEncryption:'aws:kms', storageKmsKeyId: 'xyz']]]
and:
when:
def config = Mock(FusionConfig)
def env = new AwsFusionEnv().getEnvironment('s3', Mock(FusionConfig))
then:
env == [FUSION_AWS_SERVER_SIDE_ENCRYPTION:'aws:kms', FUSION_AWS_SSEKMS_KEY_ID:'xyz']
cleanup:
Global.config = null
}
def 'should return env environment with session token' () {
given:
SysEnv.push([AWS_ACCESS_KEY_ID: 'x1', AWS_SECRET_ACCESS_KEY: 'y1', AWS_S3_ENDPOINT: 'http://my-host.com', AWS_SESSION_TOKEN: 'z1'])
and:
when:
def config = Mock(FusionConfig)
def env = new AwsFusionEnv().getEnvironment('s3', Mock(FusionConfig))
then:
env == [AWS_S3_ENDPOINT:'http://my-host.com']
when:
config = Mock(FusionConfig) { exportStorageCredentials() >> true }
env = new AwsFusionEnv().getEnvironment('s3', config)
then:
env == [AWS_ACCESS_KEY_ID: 'x1',
AWS_SECRET_ACCESS_KEY: 'y1',
AWS_S3_ENDPOINT:'http://my-host.com',
AWS_SESSION_TOKEN: 'z1']
cleanup:
SysEnv.pop()
}
}

View File

@@ -0,0 +1,233 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio
import software.amazon.awssdk.core.sync.RequestBody
import software.amazon.awssdk.services.s3.model.GetObjectRequest
import software.amazon.awssdk.services.s3.model.HeadBucketRequest
import software.amazon.awssdk.services.s3.model.HeadObjectRequest
import java.nio.ByteBuffer
import java.nio.channels.SeekableByteChannel
import java.nio.file.Path
import java.nio.file.Paths
import software.amazon.awssdk.services.s3.S3Client
import software.amazon.awssdk.services.s3.model.CreateBucketRequest
import software.amazon.awssdk.services.s3.model.DeleteBucketRequest
import software.amazon.awssdk.services.s3.model.DeleteObjectRequest
import software.amazon.awssdk.services.s3.model.S3Exception
import software.amazon.awssdk.services.s3.model.ListObjectsV2Request
import software.amazon.awssdk.services.s3.model.ListObjectVersionsRequest
import software.amazon.awssdk.services.s3.model.S3Object
import software.amazon.awssdk.services.s3.model.ObjectVersion
import software.amazon.awssdk.services.s3.model.PutObjectRequest
import nextflow.cloud.aws.util.S3PathFactory
import org.slf4j.Logger
import org.slf4j.LoggerFactory
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
trait AwsS3BaseSpec {
static final Logger log = LoggerFactory.getLogger(AwsS3BaseSpec)
abstract S3Client getS3Client()
S3Path s3path(String path) {
return (S3Path) S3PathFactory.parse(path)
}
String createBucket(String bucketName) {
s3Client.createBucket(CreateBucketRequest.builder().bucket(bucketName).build() as CreateBucketRequest)
return bucketName
}
String createBucket() {
def name = getRndBucketName()
log.debug "Creating s3 bucket '$name'"
createBucket(name)
}
String getRndBucketName() {
return "nf-s3fs-test-${UUID.randomUUID()}"
}
def createObject(String path, String content) {
createObject(Paths.get(path), content)
}
private List<String> splitName(path) {
def items = path.toString().tokenize('/')
return items.size()==1
? [ items[0], null ]
: [ items[0], items[1..-1].join('/') ]
}
def createObject(Path path, String content) {
log.debug "Creating s3 blob object '$path'"
def (bucketName, blobName) = splitName(path)
if( !blobName )
throw new IllegalArgumentException("There should be at least one dir level: $path")
return s3Client.putObject(PutObjectRequest.builder().bucket(bucketName).key(blobName).build() as PutObjectRequest, RequestBody.fromBytes(content.bytes))
}
def createDirectory(String path) {
log.debug "Creating blob directory '$path'"
def (bucketName, blobName) = splitName(path)
blobName += '/'
s3Client.putObject(PutObjectRequest.builder().bucket(bucketName).key(blobName).build() as PutObjectRequest, RequestBody.empty())
}
def deleteObject(String path) {
log.debug "Deleting blob object '$path'"
def (bucketName, blobName) = splitName(path)
blobName += '/'
s3Client.deleteObject(DeleteObjectRequest.builder().bucket(bucketName).key(blobName).build() as DeleteObjectRequest)
}
def deleteBucket(Path path) {
log.debug "Deleting blob bucket '$path'"
def (bucketName, blobName) = splitName(path)
assert blobName == null
deleteBucket(bucketName)
}
def deleteBucket(String bucketName) {
log.debug "Deleting blob bucket '$bucketName'"
if( !bucketName )
return
// Delete all objects from the bucket. This is sufficient
// for unversioned buckets. For versioned buckets, when you attempt to delete objects, Amazon S3 inserts
// delete markers for all objects, but doesn't delete the object versions.
// To delete objects from versioned buckets, delete all of the object versions before deleting
// the bucket (see below for an example).
def objectListingIterator = s3Client.listObjectsV2Paginator(ListObjectsV2Request.builder().bucket(bucketName).build() as ListObjectsV2Request).iterator();
while (objectListingIterator.hasNext()) {
Iterator<S3Object> objIter = objectListingIterator.next().contents().iterator();
while (objIter.hasNext()) {
s3Client.deleteObject(DeleteObjectRequest.builder().bucket(bucketName).key(objIter.next().key()).build() as DeleteObjectRequest);
}
}
// Delete all object versions (required for versioned buckets).
def versionListIterator = s3Client.listObjectVersionsPaginator(ListObjectVersionsRequest.builder().bucket(bucketName).build() as ListObjectVersionsRequest).iterator();
while ( versionListIterator.hasNext()){
Iterator<ObjectVersion> versionIter = versionListIterator.next().versions().iterator();
while ( versionIter.hasNext() ) {
ObjectVersion vs = versionIter.next();
s3Client.deleteObject(DeleteObjectRequest.builder().bucket(bucketName).key(vs.key()).versionId(vs.versionId()).build() as DeleteObjectRequest);
}
}
// After all objects and object versions are deleted, delete the bucket.
s3Client.deleteBucket( DeleteBucketRequest.builder().bucket(bucketName).build() as DeleteBucketRequest);
}
def tryDeleteBucket(String bucketName) {
try {
deleteBucket(bucketName)
}
catch (Throwable t) {
log.warn ("Unable to delete blob bucket '$bucketName' - Raeason: ${t.message ?: t}")
}
}
boolean existsPath(String path) {
log.debug "Check blob path exists '$path'"
existsPath(Paths.get(path))
}
boolean existsPath(Path path) {
log.debug "Check blob path exists '$path'"
def (bucketName, blobName) = splitName(path)
if( !bucketName )
throw new IllegalArgumentException("Invalid S3 path $path")
try {
if( !blobName ) {
return s3Client.headBucket(HeadBucketRequest.builder().bucket(bucketName).build() as HeadBucketRequest)
}
else {
s3Client.headObject(HeadObjectRequest.builder().bucket(bucketName).key(blobName).build() as HeadObjectRequest)
return true
}
}
catch (S3Exception e) {
if( e.statusCode() == 404 )
return false
throw e
}
}
String readObject(String path) {
log.debug "Reading blob object '$path'"
readObject(Paths.get(path))
}
String readObject(Path path) {
log.debug "Reading blob object '$path'"
def (bucketName, blobName) = splitName(path)
return s3Client
.getObject(GetObjectRequest.builder().bucket(bucketName).key(blobName).build() as GetObjectRequest)
.getText()
}
String randomText(int size) {
def result = new StringBuilder()
while( result.size() < size ) {
result << UUID.randomUUID().toString() << '\n'
}
return result.toString()
}
String readChannel(SeekableByteChannel sbc, int buffLen ) {
def buffer = new ByteArrayOutputStream()
ByteBuffer bf = ByteBuffer.allocate(buffLen)
while((sbc.read(bf))>0) {
bf.flip();
buffer.write(bf.array(), 0, bf.limit())
bf.clear();
}
buffer.toString()
}
void writeChannel( SeekableByteChannel channel, String content, int buffLen ) {
def bytes = content.getBytes()
ByteBuffer buf = ByteBuffer.allocate(buffLen);
int i=0
while( i < bytes.size()) {
def len = Math.min(buffLen, bytes.size()-i);
buf.clear();
buf.put(bytes, i, len);
buf.flip();
channel.write(buf);
i += len
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,145 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio
import java.nio.file.AccessDeniedException
import java.nio.file.NoSuchFileException
import software.amazon.awssdk.awscore.exception.AwsErrorDetails
import software.amazon.awssdk.awscore.exception.AwsServiceException
import software.amazon.awssdk.core.exception.SdkClientException
import software.amazon.awssdk.core.exception.SdkException
import software.amazon.awssdk.services.s3.model.NoSuchBucketException
import software.amazon.awssdk.services.s3.model.NoSuchKeyException
import spock.lang.Specification
import spock.lang.Unroll
/**
* Tests for the AWS SDK → NIO exception conversion in {@link S3Client#convertAwsException}.
*/
class S3ClientTest extends Specification {
def 'should map NoSuchBucketException to NoSuchFileException'() {
given:
def aws = NoSuchBucketException.builder().message('nope').build()
when:
def result = S3Client.convertAwsException(aws, 'listObjects', 'my-bucket', null)
then:
result instanceof NoSuchFileException
result.file == 's3://my-bucket'
result.cause.is(aws)
}
def 'should map NoSuchKeyException to NoSuchFileException'() {
given:
def aws = NoSuchKeyException.builder().message('missing').build()
when:
def result = S3Client.convertAwsException(aws, 'getObject', 'my-bucket', 'path/to/obj')
then:
result instanceof NoSuchFileException
result.file == 's3://my-bucket/path/to/obj'
result.cause.is(aws)
}
@Unroll
def 'should map HTTP #code to NoSuchFileException'() {
given:
def aws = AwsServiceException.builder()
.message('err')
.awsErrorDetails(AwsErrorDetails.builder().errorCode('X').build())
.statusCode(code)
.build()
when:
def result = S3Client.convertAwsException(aws, 'getObject', 'my-bucket', 'key')
then:
result instanceof NoSuchFileException
result.file == 's3://my-bucket/key'
result.cause.is(aws)
where:
code << [404]
}
@Unroll
def 'should map HTTP #code to AccessDeniedException'() {
given:
def aws = AwsServiceException.builder()
.message('denied')
.awsErrorDetails(AwsErrorDetails.builder().errorCode('X').build())
.statusCode(code)
.build()
when:
def result = S3Client.convertAwsException(aws, 'getObject', 'my-bucket', 'key')
then:
result instanceof AccessDeniedException
result.file == 's3://my-bucket/key'
result.cause.is(aws)
where:
code << [401, 403]
}
def 'should map other AwsServiceException to generic IOException'() {
given:
def aws = AwsServiceException.builder()
.message('boom')
.awsErrorDetails(AwsErrorDetails.builder().errorCode('X').build())
.statusCode(500)
.build()
when:
def result = S3Client.convertAwsException(aws, 'putObject', 'my-bucket', 'k')
then:
result instanceof IOException
!(result instanceof NoSuchFileException)
!(result instanceof AccessDeniedException)
result.message.contains('putObject')
result.message.contains('s3://my-bucket/k')
result.cause.is(aws)
}
def 'should map non-service SdkException to generic IOException'() {
given:
SdkException aws = SdkClientException.builder().message('network down').build()
when:
def result = S3Client.convertAwsException(aws, 'listBuckets', null, null)
then:
result.getClass() == IOException
result.message.contains('listBuckets')
result.message.contains('s3://')
result.cause.is(aws)
}
def 'should format path without trailing slash when key is null or empty'() {
expect:
(S3Client.convertAwsException(NoSuchBucketException.builder().message('').build(), 'op', 'b', key) as NoSuchFileException).file == 's3://b'
where:
key << [null, '']
}
}

View File

@@ -0,0 +1,99 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
import software.amazon.awssdk.services.s3.model.ServerSideEncryption
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class S3FileSystemProviderTest extends Specification {
def 'should create filesystem from config'(){
given:
def config = [
client: [
anonymous: true,
s3Acl: 'Private',
connectionTimeout: 20000,
endpoint: 'https://s3.eu-west-1.amazonaws.com',
maxConcurrency: 10,
maxNativeMemory: '500MB',
minimumPartSize: '7MB',
multipartThreshold: '32MB',
maxConnections: 100,
maxErrorRetry: 3,
socketTimeout: 20000,
requesterPays: true,
s3PathStyleAccess: true,
proxyHost: 'host.com',
proxyPort: 80,
proxyScheme: 'https',
proxyUsername: 'user',
proxyPassword: 'pass',
storageEncryption: 'AES256',
storageKmsKeyId: 'arn:key:id',
uploadMaxThreads: 15,
uploadChunkSize: '7MB',
uploadMaxAttempts: 4,
uploadRetrySleep: '200ms'
],
accessKey: '123456abc',
secretKey: '78910def',
profile: 'test'
]
def provider = new S3FileSystemProvider();
when:
def fs = provider.newFileSystem(new URI("s3:///bucket/key"), config) as S3FileSystem
then:
fs.getBucketName() == 'bucket'
def client = fs.getClient()
client.client != null
client.cannedAcl == ObjectCannedACL.PRIVATE
client.storageEncryption == ServerSideEncryption.AES256
client.isRequesterPaysEnabled == true
client.kmsKeyId == 'arn:key:id'
client.factory.accessKey() == '123456abc'
client.factory.secretKey() == '78910def'
client.factory.profile() == 'test'
client.factory.config.s3Config.anonymous == true
client.factory.config.s3Config.endpoint == 'https://s3.eu-west-1.amazonaws.com'
client.factory.config.s3Config.pathStyleAccess == true
fs.properties().getProperty('proxy_host') == 'host.com'
fs.properties().getProperty('proxy_port') == '80'
fs.properties().getProperty('proxy_scheme') == 'https'
fs.properties().getProperty('proxy_username') == 'user'
fs.properties().getProperty('proxy_password') == 'pass'
fs.properties().getProperty('socket_timeout') == '20000'
fs.properties().getProperty('connection_timeout') == '20000'
fs.properties().getProperty('max_connections') == '100'
fs.properties().getProperty('max_error_retry') == '3'
fs.properties().getProperty('upload_max_attempts') == '4'
fs.properties().getProperty('upload_retry_sleep') == '200'
fs.properties().getProperty('upload_chunk_size') == '7340032' //7MB
fs.properties().getProperty('upload_max_threads') == '15'
fs.properties().getProperty('max_concurrency') == '10'
fs.properties().getProperty('max_native_memory') == '524288000' //500MB
fs.properties().getProperty('minimum_part_size') == '7340032' //7MB
fs.properties().getProperty('multipart_threshold') == '33554432' //32MB
}
}

View File

@@ -0,0 +1,135 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio
import java.nio.file.Files
import java.nio.file.Path
import java.util.concurrent.Callable
import java.util.concurrent.ExecutorService
import java.util.concurrent.Executors
import java.util.concurrent.Future
import java.util.concurrent.ThreadFactory
import java.util.concurrent.TimeUnit
import java.util.concurrent.TimeoutException
import java.util.zip.GZIPInputStream
import groovy.util.logging.Slf4j
import nextflow.Global
import nextflow.Session
import nextflow.cloud.aws.util.S3PathFactory
import nextflow.file.FileHelper
import software.amazon.awssdk.core.ResponseInputStream
import spock.lang.IgnoreIf
import spock.lang.Shared
import spock.lang.Specification
/**
* Regression test for {@link S3FileSystemProvider#newInputStream} close-on-partial-read behavior.
*
* Before the fix, {@code newInputStream()} returned the raw {@code ResponseInputStream} from the
* AWS SDK. Closing it without reading to EOF would trigger Apache HTTP client's
* {@code ContentLengthInputStream.close()}, which drains the remaining response body to release
* the connection back to the pool. For a multi-GB object this blocked the caller for many
* minutes. The fix wraps the stream so {@code close()} calls {@code ResponseInputStream.abort()}
* instead.
*
* A dedicated spec is used because the test requires non-trivial orchestration that would
* clutter {@link AwsS3NioTest}:
* - the wall-clock bound must be enforced on the caller side — Spock {@code @Timeout} relies
* on {@code Thread.interrupt()}, which does not unblock a thread parked in
* {@code NioSocketImpl.timedRead()} on a native SSL read;
* - when the regression is present the worker thread cannot be stopped by interrupt; the
* spec captures the underlying {@link ResponseInputStream} so it can call {@code abort()}
* from the test thread on timeout to force-release the HTTP connection.
*
* The test reads the first line of a public ~1GB FASTQ in the {@code ngi-igenomes} bucket
* (eu-west-1, anonymous). Without the fix the run blows the 30s wall-clock bound; with the fix
* it completes in seconds.
*
* @author Jorge Ejarque <jorge.ejarque@seqera.io>
*/
@Slf4j
@IgnoreIf({ System.getenv('NXF_SMOKE') })
class S3InputStreamAbortTest extends Specification {
static final String PUBLIC_FASTQ =
's3://ngi-igenomes/test-data/sarek/SRR7890919_WES_HCC1395BL-EA_normal_1.fastq.gz'
static final long TIMEOUT_SECONDS = 30
@Shared
private ExecutorService executor
def setupSpec() {
executor = Executors.newSingleThreadExecutor({ Runnable r ->
def t = new Thread(r, 's3-abort-test-worker')
t.daemon = true // so a hung worker cannot keep the JVM alive
return t
} as ThreadFactory)
}
def cleanupSpec() {
executor?.shutdownNow()
}
def setup() {
// Anonymous S3 access — ngi-igenomes is public, bucket lives in eu-west-1.
def cfg = [aws: [client: [anonymous: true], region: 'eu-west-1']]
FileHelper.getOrCreateFileSystemFor(URI.create('s3:///'), cfg.aws)
Global.config = cfg
Global.session = Mock(Session) { getConfig() >> cfg }
}
def 'close on a partially-consumed newInputStream should abort, not drain'() {
given: 'an S3 path to a large (~1GB) gzipped object'
final Path path = (Path) S3PathFactory.parse(PUBLIC_FASTQ)
and: 'open the stream on the test thread'
final InputStream raw = Files.newInputStream(path)
when: 'read the first line and close on a background thread, bounded by a wall-clock timeout'
final Future<String> future = executor.submit({
String line = null
raw.withCloseable { InputStream is -> // close() here is the code path under test
def gz = new GZIPInputStream(is)
def reader = new BufferedReader(new InputStreamReader(gz, 'ASCII'))
line = reader.readLine()
}
return line
} as Callable<String>)
String firstLine
try {
firstLine = future.get(TIMEOUT_SECONDS, TimeUnit.SECONDS)
}
catch (TimeoutException e) {
// Thread.interrupt() cannot unblock the native SSL read — forcibly release the
// HTTP connection by calling abort() on the underlying ResponseInputStream so the
// worker thread can exit instead of lingering until the full body has drained.
log.warn("Timed out after ${TIMEOUT_SECONDS}s waiting for close() — aborting underlying S3 stream")
raw.abort()
throw e
}
finally {
future.cancel(true)
}
then: 'no timeout occurred and the first FASTQ record identifier was returned'
noExceptionThrown()
firstLine?.startsWith('@')
}
}

View File

@@ -0,0 +1,161 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio
import nextflow.Global
import nextflow.Session
import nextflow.cloud.aws.nio.util.S3MultipartOptions
import nextflow.file.FileHelper
import software.amazon.awssdk.services.s3.S3Client
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest
import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse
import software.amazon.awssdk.services.s3.model.UploadPartResponse
import spock.lang.IgnoreIf
import spock.lang.Requires
import spock.lang.Specification
import java.nio.file.Files
import java.nio.file.attribute.BasicFileAttributes
/**
* Test for S3OutputStream
*
* @author Jorge Ejarque <jorge.ejarque@seqera.io>
*/
class S3OutputStreamTest extends Specification implements AwsS3BaseSpec {
private S3Client s3Client0
S3Client getS3Client() { s3Client0 }
static private Map config0() {
def accessKey = System.getenv('AWS_S3FS_ACCESS_KEY')
def secretKey = System.getenv('AWS_S3FS_SECRET_KEY')
return [aws: [accessKey: accessKey, secretKey: secretKey]]
}
def setup() {
def fs = (S3FileSystem) FileHelper.getOrCreateFileSystemFor(URI.create("s3:///"), config0().aws)
s3Client0 = fs.client.getClient()
and:
def cfg = config0()
Global.config = cfg
Global.session = Mock(Session) { getConfig() >> cfg }
}
@IgnoreIf({System.getenv('NXF_SMOKE')})
@Requires({System.getenv('AWS_S3FS_ACCESS_KEY') && System.getenv('AWS_S3FS_SECRET_KEY')})
def 'should ensure multipart is used'() {
given:
def bucket = createBucket()
and:
def chunksize = 6 * 1024 * 1024
def bytes = new byte[chunksize]
new Random().nextBytes(bytes)
final path = s3path("s3://$bucket/file.txt")
def multipart = new S3MultipartOptions()
multipart.setChunkSize(chunksize)
multipart.setBufferSize(chunksize)
when:
def writer = new S3OutputStream(s3Client0, path.toS3ObjectId(), multipart)
10.times { it ->
writer.write(bytes);
writer.flush()
}
writer.close()
then:
writer.partsCount == 10
existsPath(path)
Files.readAttributes(path, BasicFileAttributes).size() == 10 * chunksize
cleanup:
if( bucket ) deleteBucket(bucket)
}
@IgnoreIf({System.getenv('NXF_SMOKE')})
@Requires({System.getenv('AWS_S3FS_ACCESS_KEY') && System.getenv('AWS_S3FS_SECRET_KEY')})
def 'should upload empty stream'() {
given:
def bucket = createBucket()
and:
final path = s3path("s3://$bucket/file.txt")
def multipart = new S3MultipartOptions()
when:
def writer = new S3OutputStream(s3Client0, path.toS3ObjectId(), multipart)
writer.close()
then:
writer.partsCount == 0
existsPath(path)
Files.readAttributes(path, BasicFileAttributes).size() == 0
cleanup:
if( bucket ) deleteBucket(bucket)
}
@IgnoreIf({System.getenv('NXF_SMOKE')})
@Requires({System.getenv('AWS_S3FS_ACCESS_KEY') && System.getenv('AWS_S3FS_SECRET_KEY')})
def 'should upload without multipart'() {
given:
def bucket = createBucket()
and:
def TEXT = randomText(50 * 1024)
final path = s3path("s3://$bucket/file.txt")
def multipart = new S3MultipartOptions()
when:
def writer = new S3OutputStream(s3Client0, path.toS3ObjectId(), multipart)
writer.write(TEXT.bytes)
writer.close()
then:
writer.partsCount == 0
existsPath(path)
readObject(path) == TEXT
cleanup:
if( bucket ) deleteBucket(bucket)
}
def 'should send sorted parts to completeMultipartUpload'() {
given:
final path = s3path("s3://test/file.txt")
def multipart = new S3MultipartOptions()
def client = Mock(S3Client)
def capturedParts = null
def writer = new S3OutputStream(client, path.toS3ObjectId(), multipart)
when: 'simulate unsorted uploads'
writer.init()
writer.uploadPart(InputStream.nullInputStream(), 25, "checksum".bytes, 2, true)
writer.uploadPart(InputStream.nullInputStream(), 25, "checksum".bytes, 0, false)
writer.uploadPart(InputStream.nullInputStream(), 25, "checksum".bytes, 1, false)
writer.completeMultipartUpload()
then:
1 * client.createMultipartUpload(_) >> CreateMultipartUploadResponse.builder().uploadId("upload-id").build()
3 * client.uploadPart(_,_) >> { UploadPartResponse.builder().eTag('etag').build()}
1 * client.completeMultipartUpload(_ as CompleteMultipartUploadRequest) >> { CompleteMultipartUploadRequest req ->
capturedParts = req.multipartUpload().parts()
return null
}
capturedParts[0].partNumber() == 0
capturedParts[1].partNumber() == 1
capturedParts[2].partNumber() == 2
}
}

View File

@@ -0,0 +1,96 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.ng
import nextflow.util.Duration
import nextflow.util.MemoryUnit
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class DownloadOptsTest extends Specification {
def 'should get default options' () {
given:
def props = new Properties()
when:
def opts = DownloadOpts.from(props)
then:
opts.numWorkers() == 10
opts.queueMaxSize() == 10_000
opts.bufferMaxSize() == MemoryUnit.of('1 GB')
opts.chunkSize() == 10 * 1024 * 1024
!opts.parallelEnabled()
opts.maxDelayMillis() == Duration.of('90s').toMillis()
opts.maxAttempts() == 5
}
def 'should set options with properties' () {
given:
def CONFIG = '''
download_parallel = false
download_queue_max_size = 11
download_buffer_max_size = 222MB
download_num_workers = 33
download_chunk_size = 44
download_max_attempts = 99
download_max_delay = 99s
'''
def props = new Properties()
props.load(new StringReader(CONFIG))
when:
def opts = DownloadOpts.from(props)
then:
opts.numWorkers() == 33
opts.queueMaxSize() == 11
opts.bufferMaxSize() == MemoryUnit.of('222 MB')
opts.chunkSize() == 44
!opts.parallelEnabled()
opts.maxAttempts() == 99
opts.maxDelayMillis() == Duration.of('99s').toMillis()
}
def 'should set options with env' () {
given:
def ENV = [
NXF_S3_DOWNLOAD_PARALLEL: 'false',
NXF_S3_DOWNLOAD_QUEUE_SIZE: '11',
NXF_S3_DOWNLOAD_NUM_WORKERS: '22',
NXF_S3_DOWNLOAD_CHUNK_SIZE: '33',
NXF_S3_DOWNLOAD_BUFFER_MAX_MEM: '44 G',
NXF_S3_DOWNLOAD_MAX_ATTEMPTS: '88',
NXF_S3_DOWNLOAD_MAX_DELAY: '88s'
]
when:
def opts = DownloadOpts.from(new Properties(), ENV)
then:
!opts.parallelEnabled()
opts.queueMaxSize() == 11
opts.numWorkers() == 22
opts.chunkSize() == 33
opts.bufferMaxSize() == MemoryUnit.of('44 GB')
opts.maxAttempts() == 88
opts.maxDelayMillis() == Duration.of('88s').toMillis()
}
}

View File

@@ -0,0 +1,64 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.ng
import java.util.concurrent.Executors
import java.util.function.Function
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class FutureInputStreamTest extends Specification {
def 'should read the stream ad give back the chunks' () {
given:
def STR = "hello world!"
def BYTES = STR.bytes
def CHUNK_SIZE = BYTES.length +2
def TIMES = 10
def CAPACITY = 1
def buffers = new ChunkBufferFactory(CHUNK_SIZE, CAPACITY)
and:
def executor = Executors.newFixedThreadPool(10)
and:
def parts = []; TIMES.times { parts.add(it) }
def Function<Integer,ChunkBuffer> task = {
def chunk = buffers.create()
chunk.fill( new ByteArrayInputStream(BYTES) )
chunk.makeReadable()
return chunk
}
when:
def itr = new FutureIterator(parts, task, executor, CAPACITY)
def stream = new FutureInputStream(itr)
then:
stream.text == STR * TIMES
and:
buffers.getPoolSize() == CAPACITY
cleanup:
executor.shutdownNow()
}
}

View File

@@ -0,0 +1,126 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.util
import software.amazon.awssdk.transfer.s3.S3TransferManager
import spock.lang.Specification
import spock.lang.Unroll
/**
* Test for ExtendedS3TransferManager
*
* @author Jorge Ejarque <jorge.ejarque@seqera.io>
*/
class ExtendedS3TransferManagerTest extends Specification {
def 'should initialize with default values'() {
given:
def mockTransferManager = Mock(S3TransferManager)
def props = new Properties()
when:
def extendedManager = new ExtendedS3TransferManager(mockTransferManager, props)
then:
extendedManager.partSize == 8 * 1024 * 1024 // 8 MB
extendedManager.downloadPermits == 50 // 400MB / 8MB
}
def 'should initialize with custom properties'() {
given:
def mockTransferManager = Mock(S3TransferManager)
def props = new Properties()
props.setProperty('max_download_heap_memory', '200000000') // 200 MB
props.setProperty('minimum_part_size', '16777216') // 16 MB
when:
def extendedManager = new ExtendedS3TransferManager(mockTransferManager, props)
then:
extendedManager.partSize == 16 * 1024 * 1024 // 16 MB
extendedManager.downloadPermits == 11 // 200MB / 16MB (floor) = 11.92... -> 11
}
@Unroll
def 'should estimate parts correctly'() {
given:
def mockTransferManager = Mock(S3TransferManager)
def props = new Properties()
props.setProperty('minimum_part_size', partSizeStr)
def extendedManager = new ExtendedS3TransferManager(mockTransferManager, props)
expect:
extendedManager.estimateParts(fileSize) == expectedParts
where:
fileSize | partSizeStr | expectedParts
1024 | '8388608' | 1 // 1KB file, 8MB parts = 1 part
8388608 | '8388608' | 1 // 8MB file, 8MB parts = 1 part
16777216 | '8388608' | 2 // 16MB file, 8MB parts = 2 parts
100000000 | '8388608' | 10 // ~95MB file, 8MB parts = 10 parts (capped at DEFAULT_INIT_BUFFER_PARTS)
500000000 | '8388608' | 10 // ~476MB file, 8MB parts = 10 parts (capped at DEFAULT_INIT_BUFFER_PARTS)
1048576 | '1048576' | 1 // 1MB file, 1MB parts = 1 part
10485760 | '1048576' | 10 // 10MB file, 1MB parts = 10 parts (capped at DEFAULT_INIT_BUFFER_PARTS)
}
def 'should calculate downloadPermits correctly'() {
given:
def mockTransferManager = Mock(S3TransferManager)
def props = new Properties()
props.setProperty('max_download_heap_memory', maxBuffer)
props.setProperty('minimum_part_size', partSize)
when:
def extendedManager = new ExtendedS3TransferManager(mockTransferManager, props)
then:
extendedManager.downloadPermits == expectedMaxParts
where:
maxBuffer | partSize | expectedMaxParts
'419430400' | '8388608' | 50 // 400MB / 8MB
'104857600' | '8388608' | 12 // 100MB / 8MB
'838860800' | '8388608' | 100 // 800MB / 8MB
'419430400' | '16777216' | 25 // 400MB / 16MB
}
def 'should handle zero or negative file sizes in estimateParts'() {
given:
def mockTransferManager = Mock(S3TransferManager)
def props = new Properties()
def extendedManager = new ExtendedS3TransferManager(mockTransferManager, props)
expect:
extendedManager.estimateParts(0) == 1
extendedManager.estimateParts(-100) == 1
}
def 'should handle large file sizes in estimateParts'() {
given:
def mockTransferManager = Mock(S3TransferManager)
def props = new Properties()
def extendedManager = new ExtendedS3TransferManager(mockTransferManager, props)
when:
def parts = extendedManager.estimateParts(Long.MAX_VALUE)
then:
parts == 10 // Should be capped at DEFAULT_INIT_BUFFER_PARTS
}
}

View File

@@ -0,0 +1,95 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.nio.util
import nextflow.cloud.aws.config.AwsConfig
import software.amazon.awssdk.http.SdkHttpConfigurationOption
import spock.lang.Specification
class S3ClientConfigurationTest extends Specification{
def 'create S3 synchronous client configuration' (){
given:
def props = new Properties()
def config = new AwsConfig([client: [connectionTimeout: 20000, maxConnections: 100, maxErrorRetry: 3, socketTimeout: 20000,
proxyHost: 'host.com', proxyPort: 80, proxyScheme: 'https', proxyUsername: 'user', proxyPassword: 'pass']])
props.putAll(config.getS3LegacyProperties())
when:
def clientConfig = S3SyncClientConfiguration.create(props)
then:
def overrideConfig = clientConfig.getClientOverrideConfiguration()
overrideConfig.retryStrategy().get().maxAttempts() == 4
def httpClientbuilder = clientConfig.getHttpClientBuilder()
httpClientbuilder.proxyConfiguration.host() == 'host.com'
httpClientbuilder.proxyConfiguration.port() == 80
httpClientbuilder.proxyConfiguration.scheme() == 'https'
httpClientbuilder.proxyConfiguration.username() == 'user'
httpClientbuilder.proxyConfiguration.password() == 'pass'
httpClientbuilder.standardOptions.get(SdkHttpConfigurationOption.CONNECTION_TIMEOUT).toMillis()== 20000
httpClientbuilder.standardOptions.get(SdkHttpConfigurationOption.READ_TIMEOUT).toMillis() == 20000 //socket timeout
httpClientbuilder.standardOptions.get(SdkHttpConfigurationOption.MAX_CONNECTIONS) == 100
}
def 'create S3 asynchronous client configuration with default socket timeout' (){
given:
def props = new Properties()
def config = new AwsConfig([client: [:]])
props.putAll(config.getS3LegacyProperties())
when:
def clientConfig = S3AsyncClientConfiguration.create(props)
then:
def httpConfiguration = clientConfig.getCrtHttpConfiguration()
httpConfiguration.healthConfiguration().minimumThroughputInBps() == 1
httpConfiguration.healthConfiguration().minimumThroughputTimeout().toMillis() == 30000
}
def 'create S3 asynchronous client configuration' (){
given:
def props = new Properties()
def config = new AwsConfig([client: [
maxConcurrency: 10, maxNativeMemory: '500MB', minimumPartSize: '7MB', multipartThreshold: '32MB',
targetThroughputInGbps: 15, connectionTimeout: 20000, maxConnections: 100, maxErrorRetry: 3, socketTimeout: 20000,
proxyHost: 'host.com', proxyPort: 80, proxyScheme: 'https', proxyUsername: 'user', proxyPassword: 'pass']])
props.putAll(config.getS3LegacyProperties())
when:
def clientConfig = S3AsyncClientConfiguration.create(props)
then:
def overrideConfig = clientConfig.getClientOverrideConfiguration()
overrideConfig.retryStrategy().get().maxAttempts() == 4
// Check Crt performance settings
clientConfig.getMaxConcurrency() == 10
clientConfig.getMaxNativeMemoryInBytes() == 524288000L
clientConfig.getTargetThroughputInGbps() == 15
// Check multipartConfig
def multipartConfig = clientConfig.getMultipartConfiguration()
multipartConfig.thresholdInBytes() == 33554432
multipartConfig.minimumPartSizeInBytes() == 7340032
// Check Crt http configuration
def httpConfiguration = clientConfig.getCrtHttpConfiguration()
httpConfiguration.proxyConfiguration().host() == 'host.com'
httpConfiguration.proxyConfiguration().port() == 80
httpConfiguration.proxyConfiguration().scheme() == 'https'
httpConfiguration.proxyConfiguration().username() == 'user'
httpConfiguration.proxyConfiguration().password() == 'pass'
//Check Timeout
httpConfiguration.healthConfiguration().minimumThroughputInBps() == 1
httpConfiguration.healthConfiguration().minimumThroughputTimeout().toMillis() == 20000
//Check Crt Retry Configuration
def retryConfig = clientConfig.getCrtRetryConfiguration()
retryConfig.numRetries() == 3
}
}

View File

@@ -0,0 +1,38 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.util
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class AwsHelperTest extends Specification {
def 'should parse S3 acl' () {
expect:
AwsHelper.parseS3Acl('PublicRead') == ObjectCannedACL.PUBLIC_READ
AwsHelper.parseS3Acl('public-read') == ObjectCannedACL.PUBLIC_READ
AwsHelper.parseS3Acl('Private') == ObjectCannedACL.PRIVATE
AwsHelper.parseS3Acl('private') == ObjectCannedACL.PRIVATE
when:
AwsHelper.parseS3Acl('unknown')
then:
thrown(IllegalArgumentException)
}
}

View File

@@ -0,0 +1,875 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.util
import nextflow.Global
import nextflow.Session
import nextflow.cloud.aws.batch.AwsOptions
import nextflow.util.Duration
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class S3BashLibTest extends Specification {
// -- legacy
def 'should get uploader script' () {
given:
def opts = Mock(AwsOptions)
when:
def script = S3BashLib.script(opts)
then:
1 * opts.getAwsCli() >> 'aws'
1 * opts.getStorageClass() >> null
1 * opts.getStorageEncryption() >> null
script == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=1
local timeout=10
local attempt=0
local exitCode=0
while (( \$attempt < \$max_attempts ))
do
if "\$@"
then
return 0
else
exitCode=\$?
fi
if [[ \$exitCode == 0 ]]
then
break
fi
nxf_sleep \$timeout
attempt=\$(( attempt + 1 ))
timeout=\$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n\'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''
.stripIndent(true)
}
def 'should set storage class and encryption' () {
given:
def opts = Mock(AwsOptions)
when:
def script = S3BashLib.script(opts)
then:
opts.getStorageClass() >> 'S-CLAZZ'
opts.getStorageEncryption() >> 'S-ENCRYPT'
opts.getAwsCli() >> '/foo/bin/aws'
opts.getMaxParallelTransfers() >> 33
script == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=1
local timeout=10
local attempt=0
local exitCode=0
while (( \$attempt < \$max_attempts ))
do
if "\$@"
then
return 0
else
exitCode=\$?
fi
if [[ \$exitCode == 0 ]]
then
break
fi
nxf_sleep \$timeout
attempt=\$(( attempt + 1 ))
timeout=\$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n\'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>33 )); then echo 33; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
/foo/bin/aws s3 cp --only-show-errors --sse S-ENCRYPT --storage-class S-CLAZZ - "$s3path"
elif [[ -d "$name" ]]; then
/foo/bin/aws s3 cp --only-show-errors --recursive --sse S-ENCRYPT --storage-class S-CLAZZ "$name" "$s3path/$name"
else
/foo/bin/aws s3 cp --only-show-errors --sse S-ENCRYPT --storage-class S-CLAZZ "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(/foo/bin/aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
/foo/bin/aws s3 cp --only-show-errors --recursive "$source" "$target"
else
/foo/bin/aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''
.stripIndent(true)
}
// -- new test
def 'should create base script' () {
given:
Global.session = Mock(Session) {
getConfig() >> [:]
}
expect:
S3BashLib.script() == '''
# aws cli retry config
export AWS_RETRY_MODE=standard
export AWS_MAX_ATTEMPTS=5
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should create base script with legacy retry mode' () {
given:
Global.session = Mock(Session) {
getConfig() >> [aws:[batch: [maxTransferAttempts: 100, retryMode: 'legacy']]]
}
expect:
S3BashLib.script() == '''
# aws cli retry config
export AWS_RETRY_MODE=legacy
export AWS_MAX_ATTEMPTS=100
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should create base script with built-in retry mode' () {
given:
Global.session = Mock(Session) {
getConfig() >> [aws:[batch: [retryMode: 'built-in']]]
}
expect:
S3BashLib.script() == '''
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should create base script with custom settings' () {
given:
Global.session = Mock(Session) {
getConfig() >> [aws:[batch: [cliPath: '/some/bin/aws', retryMode: 'legacy', maxTransferAttempts: 99]]]
}
expect:
S3BashLib.script() == '''
# aws cli retry config
export AWS_RETRY_MODE=legacy
export AWS_MAX_ATTEMPTS=99
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
/some/bin/aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
/some/bin/aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
/some/bin/aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(/some/bin/aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
/some/bin/aws s3 cp --only-show-errors --recursive "$source" "$target"
else
/some/bin/aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should create base script with custom options' () {
given:
def opts = Mock(AwsOptions) {
getMaxParallelTransfers() >> 5
getMaxTransferAttempts() >> 10
getDelayBetweenAttempts() >> Duration.of('20s')
}
expect:
S3BashLib.script(opts) == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=10
local timeout=20
local attempt=0
local exitCode=0
while (( $attempt < $max_attempts ))
do
if "$@"
then
return 0
else
exitCode=$?
fi
if [[ $exitCode == 0 ]]
then
break
fi
nxf_sleep $timeout
attempt=$(( attempt + 1 ))
timeout=$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>5 )); then echo 5; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should create base script with options' () {
given:
def opts = Mock(AwsOptions)
expect:
S3BashLib.script(opts) == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=1
local timeout=10
local attempt=0
local exitCode=0
while (( $attempt < $max_attempts ))
do
if "$@"
then
return 0
else
exitCode=$?
fi
if [[ $exitCode == 0 ]]
then
break
fi
nxf_sleep $timeout
attempt=$(( attempt + 1 ))
timeout=$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should create with storage encrypt' () {
given:
def sess1 = Mock(Session) {
getConfig() >> [aws: [ client: [ storageKmsKeyId: 'my-kms-key', storageEncryption: 'aws:kms']]]
}
and:
def opts = new AwsOptions(sess1)
expect:
S3BashLib.script(opts) == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=5
local timeout=10
local attempt=0
local exitCode=0
while (( $attempt < $max_attempts ))
do
if "$@"
then
return 0
else
exitCode=$?
fi
if [[ $exitCode == 0 ]]
then
break
fi
nxf_sleep $timeout
attempt=$(( attempt + 1 ))
timeout=$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws cli retry config
export AWS_RETRY_MODE=standard
export AWS_MAX_ATTEMPTS=5
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --sse aws:kms --sse-kms-key-id my-kms-key --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --sse aws:kms --sse-kms-key-id my-kms-key --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --sse aws:kms --sse-kms-key-id my-kms-key --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should create with s3 acl' () {
given:
def sess1 = Mock(Session) {
getConfig() >> [aws: [ client: [ s3Acl: 'PublicRead']]]
}
and:
def opts = new AwsOptions(sess1)
expect:
S3BashLib.script(opts) == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=5
local timeout=10
local attempt=0
local exitCode=0
while (( $attempt < $max_attempts ))
do
if "$@"
then
return 0
else
exitCode=$?
fi
if [[ $exitCode == 0 ]]
then
break
fi
nxf_sleep $timeout
attempt=$(( attempt + 1 ))
timeout=$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws cli retry config
export AWS_RETRY_MODE=standard
export AWS_MAX_ATTEMPTS=5
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --acl public-read --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --acl public-read --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --acl public-read --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should create s5cmd script' () {
given:
Global.session = Mock(Session) {
getConfig() >> [aws:[batch:[platformType: 'fargate', cliPath: 's5cmd']]]
}
expect:
S3BashLib.script() == '''
# aws helper for s5cmd
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
local tmp=$(nxf_mktemp)
cp /dev/stdin $tmp/$name
s5cmd cp --storage-class STANDARD $tmp/$name "$s3path"
elif [[ -d "$name" ]]; then
s5cmd cp --storage-class STANDARD "$name/" "$s3path/$name/"
else
s5cmd cp --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(s5cmd ls $source | grep -F "DIR ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
s5cmd cp "$source/*" "$target"
else
s5cmd cp "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should create s5cmd script with acl' () {
given:
Global.session = Mock(Session) {
getConfig() >> [aws:[batch:[platformType: 'fargate', cliPath: 's5cmd'], client:[ s3Acl: 'PublicRead']]]
}
expect:
S3BashLib.script() == '''
# aws helper for s5cmd
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
local tmp=$(nxf_mktemp)
cp /dev/stdin $tmp/$name
s5cmd cp --acl public-read --storage-class STANDARD $tmp/$name "$s3path"
elif [[ -d "$name" ]]; then
s5cmd cp --acl public-read --storage-class STANDARD "$name/" "$s3path/$name/"
else
s5cmd cp --acl public-read --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(s5cmd ls $source | grep -F "DIR ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
s5cmd cp "$source/*" "$target"
else
s5cmd cp "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should create script with force glacier transfer' () {
given:
Global.session = Mock(Session) {
getConfig() >> [aws:[batch:[forceGlacierTransfer: true]]]
}
expect:
S3BashLib.script() == '''
# aws cli retry config
export AWS_RETRY_MODE=standard
export AWS_MAX_ATTEMPTS=5
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive --force-glacier-transfer "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
}

View File

@@ -0,0 +1,57 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.util
import nextflow.cloud.aws.nio.S3Path
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class S3PathFactoryTest extends Specification {
def 'should parse s3 paths' () {
when:
def path = S3PathFactory.parse(S3_PATH)
then:
path instanceof S3Path
with(path as S3Path) {
getBucket() == BUCKET
getKey() == KEY
}
when:
def str = S3PathFactory.getUriString(path)
then:
str == S3_PATH
where:
S3_PATH | BUCKET | KEY
's3://cbcrg-eu/raw/x_r1.fq' | 'cbcrg-eu' | 'raw/x_r1.fq'
's3://cbcrg-eu/raw/**_R1*{fastq,fq,fastq.gz,fq.gz}' | 'cbcrg-eu' | 'raw/**_R1*{fastq,fq,fastq.gz,fq.gz}'
}
def 'should ignore double slashes' () {
when:
def path = S3PathFactory.parse('s3://cbcrg-eu/raw//x_r1.fq' )
then:
S3PathFactory.getUriString(path) == 's3://cbcrg-eu/raw/x_r1.fq'
}
}

View File

@@ -0,0 +1,107 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.aws.util
import nextflow.cloud.aws.nio.S3Path
import nextflow.file.FileHelper
import spock.lang.Specification
import spock.lang.Unroll
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class S3PathTest extends Specification {
@Unroll
def 'should convert to uri string' () {
expect:
FileHelper.asPath(PATH).toUriString() == STR
where:
_ | PATH | STR
_ | 's3://foo' | 's3://foo/'
_ | 's3://foo/bar' | 's3://foo/bar'
_ | 's3://foo/b a r' | 's3://foo/b a r'
_ | 's3://f o o/bar' | 's3://f o o/bar'
_ | 's3://f_o_o/bar' | 's3://f_o_o/bar'
}
@Unroll
def 'should convert to string' () {
expect:
FileHelper.asPath(PATH).toString() == STR
where:
_ | PATH | STR
_ | 's3://foo' | '/foo/'
_ | 's3://foo/bar' | '/foo/bar'
_ | 's3://foo/b a r' | '/foo/b a r'
_ | 's3://f o o/bar' | '/f o o/bar'
_ | 's3://f_o_o/bar' | '/f_o_o/bar'
}
def 'should check equals and hashcode' () {
given:
def path1 = FileHelper.asPath('s3://foo/some/foo.txt')
def path2 = FileHelper.asPath('s3://foo/some/foo.txt')
def path3 = FileHelper.asPath('s3://foo/some/bar.txt')
def path4 = FileHelper.asPath('s3://bar/some/foo.txt')
expect:
path1 == path2
path1 != path3
path3 != path4
and:
path1.hashCode() == path2.hashCode()
path1.hashCode() != path3.hashCode()
path3.hashCode() != path4.hashCode()
}
@Unroll
def 'should determine bucket name' () {
expect:
S3Path.bucketName(new URI(URI_PATH)) == BUCKET
where:
URI_PATH | BUCKET
's3:///' | null
's3:///foo' | 'foo'
's3:///foo/' | 'foo'
's3:///foo/bar' | 'foo'
}
@Unroll
def 'should normalise path' () {
expect:
FileHelper.asPath(PATH).normalize() == FileHelper.asPath(EXPECTED)
where:
PATH | EXPECTED
's3://foo' | 's3://foo'
's3://foo/x/y/z.txt' | 's3://foo/x/y/z.txt'
's3://foo/x/y/./z.txt' | 's3://foo/x/y/z.txt'
's3://foo/x/y/../z.txt' | 's3://foo/x/z.txt'
's3://foo/x/y/../../z.txt' | 's3://foo/z.txt'
's3://foo/x/y//z.txt' | 's3://foo/x/y/z.txt'
's3://foo/./z.txt' | 's3://foo/z.txt'
}
}

View File

@@ -0,0 +1,146 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.executor
import java.nio.file.Path
import nextflow.Session
import nextflow.SysEnv
import nextflow.cloud.aws.batch.AwsBatchExecutor
import nextflow.cloud.aws.batch.AwsOptions
import nextflow.cloud.aws.util.S3PathFactory
import nextflow.processor.TaskHandler
import nextflow.processor.TaskRun
import spock.lang.Specification
import spock.lang.Unroll
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class AwsBatchExecutorTest extends Specification {
def 'should check is fusion' () {
given:
SysEnv.push(ENV)
and:
def sess = Mock(Session) {
getConfig() >> CONFIG
}
def executor = new AwsBatchExecutor(session: sess)
expect:
executor.isFusionEnabled() == EXPECTED
cleanup:
SysEnv.pop()
where:
CONFIG | ENV | EXPECTED
[:] | [:] | false
[fusion:[enabled: true]] | [:] | true
[fusion:[enabled: false]] | [FUSION_ENABLED:'true'] | false // <-- config has priority
[:] | [FUSION_ENABLED:'true'] | true
[:] | [FUSION_ENABLED:'false'] | false
}
def 'should validate shouldDeleteJob method' () {
given:
def executor = Spy(AwsBatchExecutor)
expect:
executor.shouldDeleteJob('job-1')
executor.shouldDeleteJob('job-2')
executor.shouldDeleteJob('job-3')
and:
!executor.shouldDeleteJob('job-1')
!executor.shouldDeleteJob('job-1')
!executor.shouldDeleteJob('job-2')
!executor.shouldDeleteJob('job-2')
!executor.shouldDeleteJob('job-3')
!executor.shouldDeleteJob('job-3')
}
def 'should get array index variable and start' () {
given:
def executor = Spy(AwsBatchExecutor)
expect:
executor.getArrayIndexName() == 'AWS_BATCH_JOB_ARRAY_INDEX'
executor.getArrayIndexStart() == 0
}
@Unroll
def 'should get array task id' () {
given:
def executor = Spy(AwsBatchExecutor)
expect:
executor.getArrayTaskId(JOB_ID, TASK_INDEX) == EXPECTED
where:
JOB_ID | TASK_INDEX | EXPECTED
'foo' | 1 | 'foo:1'
'bar' | 2 | 'bar:2'
}
protected Path s3(String path) {
S3PathFactory.parse('s3:/' + path)
}
@Unroll
def 'should get array task id' () {
given:
def executor = Spy(AwsBatchExecutor) {
isFusionEnabled()>>FUSION
isWorkDirDefaultFS()>>DEFAULT_FS
}
and:
def handler = Mock(TaskHandler) {
getTask() >> Mock(TaskRun) { getWorkDir() >> WORK_DIR }
}
expect:
executor.getArrayWorkDir(handler) == EXPECTED
where:
FUSION | DEFAULT_FS | WORK_DIR | EXPECTED
false | false | s3('/foo/work/dir') | 's3://foo/work/dir'
true | false | s3('/foo/work/dir') | '/fusion/s3/foo/work/dir'
false | true | Path.of('/nfs/work') | '/nfs/work'
}
def 'should get array launch command' (){
given:
def executor = Spy(AwsBatchExecutor) {
isFusionEnabled()>>FUSION
isWorkDirDefaultFS()>>DEFAULT_FS
getAwsOptions() >> Mock(AwsOptions) {
getS5cmdPath() >> { S5CMD ? 's5cmd' : null }
getAwsCli() >> { 'aws' }
}
}
expect:
executor.getArrayLaunchCommand(TASK_DIR) == EXPECTED
where:
FUSION | DEFAULT_FS | S5CMD | TASK_DIR | EXPECTED
false | false | false | 's3://foo/work/dir' | 'bash -o pipefail -c \'trap "[[ -n \\$pid ]] && kill -TERM \\$pid" TERM; trap "{ ret=$?; aws s3 cp --only-show-errors .command.log s3://foo/work/dir/.command.log||true; exit $ret; }" EXIT; aws s3 cp --only-show-errors s3://foo/work/dir/.command.run - | bash > >(tee .command.log) 2>&1 & pid=$!; wait $pid\''
false | false | true | 's3://foo/work/dir' | 'bash -o pipefail -c \'trap "[[ -n \\$pid ]] && kill -TERM \\$pid" TERM; trap "{ ret=$?; s5cmd cp .command.log s3://foo/work/dir/.command.log||true; exit $ret; }" EXIT; s5cmd cat s3://foo/work/dir/.command.run | bash > >(tee .command.log) 2>&1 & pid=$!; wait $pid\''
and:
true | false | false | '/fusion/work/dir' | 'bash /fusion/work/dir/.command.run'
false | true | false | '/nfs/work/dir' | 'bash /nfs/work/dir/.command.run 2>&1 > /nfs/work/dir/.command.log'
}
}

View File

@@ -0,0 +1,217 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.executor
import java.nio.file.Paths
import nextflow.Global
import nextflow.Session
import nextflow.cloud.aws.batch.AwsBatchFileCopyStrategy
import nextflow.cloud.aws.batch.AwsOptions
import nextflow.cloud.aws.util.S3PathFactory
import nextflow.processor.TaskBean
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class BashWrapperBuilderWithS3Test extends Specification {
def 'should include s3 helpers' () {
given:
Global.session = Mock(Session) { getConfig() >> [:] }
and:
def folder = Paths.get('/work/dir')
def target = S3PathFactory.parse('s3://some/buck et') // <-- path with blank
def bean = new TaskBean([
name: 'Hello 1',
workDir: folder,
targetDir: target,
scratch: true,
outputFiles: ['test.bam','test.bai', 'bla nk.txt'], // <-- file name with blank
script: 'echo Hello world!',
])
def copy = new SimpleFileCopyStrategy(bean)
/*
* simple bash run
*/
when:
def binding = new BashWrapperBuilder(bean,copy).makeBinding()
then:
binding.unstage_outputs == '''\
IFS=$'\\n'
for name in $(eval "ls -1d test.bam test.bai bla\\ nk.txt" | sort | uniq); do
nxf_s3_upload $name s3://some/buck\\ et
done
unset IFS
'''.stripIndent().rightTrim()
binding.helpers_script == '''\
# aws cli retry config
export AWS_RETRY_MODE=standard
export AWS_MAX_ATTEMPTS=5
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
def 'should include s3 helpers and bash lib' () {
given:
Global.session = Mock(Session) { getConfig() >> [:] }
and:
def folder = Paths.get('/work/dir')
def target = S3PathFactory.parse('s3://some/bucket')
def bean = new TaskBean([
name: 'Hello 1',
workDir: folder,
targetDir: target,
scratch: true,
outputFiles: ['test.bam','test.bai'],
script: 'echo Hello world!',
])
def copy = new AwsBatchFileCopyStrategy(bean, Mock(AwsOptions))
/*
* simple bash run
*/
when:
def binding = new BashWrapperBuilder(bean,copy).makeBinding()
then:
binding.unstage_outputs == '''\
uploads=()
IFS=$'\\n'
for name in $(eval "ls -1d test.bam test.bai" | sort | uniq); do
uploads+=("nxf_s3_upload '$name' s3://some/bucket")
done
unset IFS
nxf_parallel "${uploads[@]}"
'''.stripIndent()
binding.helpers_script == '''\
# bash helper functions
nxf_cp_retry() {
local max_attempts=1
local timeout=10
local attempt=0
local exitCode=0
while (( \$attempt < \$max_attempts ))
do
if "\$@"
then
return 0
else
exitCode=\$?
fi
if [[ \$exitCode == 0 ]]
then
break
fi
nxf_sleep \$timeout
attempt=\$(( attempt + 1 ))
timeout=\$(( timeout * 2 ))
done
}
nxf_parallel() {
IFS=$'\\n\'
local cmd=("$@")
local cpus=$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c)
local max=$(if (( cpus>4 )); then echo 4; else echo $cpus; fi)
local i=0
local pid=()
(
set +u
while ((i<${#cmd[@]})); do
local copy=()
for x in "${pid[@]}"; do
# if the process exist, keep in the 'copy' array, otherwise wait on it to capture the exit code
# see https://github.com/nextflow-io/nextflow/pull/4050
[[ -e /proc/$x ]] && copy+=($x) || wait $x
done
pid=("${copy[@]}")
if ((${#pid[@]}>=$max)); then
nxf_sleep 0.2
else
eval "${cmd[$i]}" &
pid+=($!)
((i+=1))
fi
done
for p in "${pid[@]}"; do
wait $p
done
)
unset IFS
}
# aws helper
nxf_s3_upload() {
local name=$1
local s3path=$2
if [[ "$name" == - ]]; then
aws s3 cp --only-show-errors --storage-class STANDARD - "$s3path"
elif [[ -d "$name" ]]; then
aws s3 cp --only-show-errors --recursive --storage-class STANDARD "$name" "$s3path/$name"
else
aws s3 cp --only-show-errors --storage-class STANDARD "$name" "$s3path/$name"
fi
}
nxf_s3_download() {
local source=$1
local target=$2
local file_name=$(basename $1)
local is_dir=$(aws s3 ls $source | grep -F "PRE ${file_name}/" -c)
if [[ $is_dir == 1 ]]; then
aws s3 cp --only-show-errors --recursive "$source" "$target"
else
aws s3 cp --only-show-errors "$source" "$target"
fi
}
'''.stripIndent(true)
}
}

View File

@@ -0,0 +1,117 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.executor
import java.nio.file.Path
import nextflow.Global
import nextflow.SysEnv
import nextflow.cloud.aws.util.S3PathFactory
import nextflow.fusion.FusionScriptLauncher
import nextflow.processor.TaskBean
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class FusionScriptLauncherS3Test extends Specification {
def 'should get container mount' () {
given:
Global.config = Collections.emptyMap()
and:
def fusion = new FusionScriptLauncher(Mock(TaskBean), 's3', null)
when:
def result = fusion.toContainerMount(S3PathFactory.parse('s3://foo/a/b/c.txt'))
then:
result == Path.of('/fusion/s3/foo/a/b/c.txt')
when:
result = fusion.toContainerMount(S3PathFactory.parse('s3://foo/a/x/y.txt'))
then:
result == Path.of('/fusion/s3/foo/a/x/y.txt')
when:
result = fusion.toContainerMount(S3PathFactory.parse('s3://bar/z.txt'))
then:
result == Path.of('/fusion/s3/bar/z.txt')
}
def 'should get fusion env with s3 endpoint' () {
given:
Global.config = [:]
and:
SysEnv.push([AWS_S3_ENDPOINT: 'http://foo.com'])
and:
def fusion = new FusionScriptLauncher(Mock(TaskBean), 's3', S3PathFactory.parse('s3://foo/work'))
expect:
fusion.fusionEnv() == [AWS_S3_ENDPOINT: 'http://foo.com',
FUSION_WORK: '/fusion/s3/foo/work',
FUSION_TAGS: "[.command.*|.exitcode|.fusion.*](nextflow.io/metadata=true),[*](nextflow.io/temporary=true)"
]
cleanup:
SysEnv.pop()
}
def 'should get fusion env with aws credentials' () {
given:
SysEnv.push([AWS_ACCESS_KEY_ID: 'xxx', AWS_SECRET_ACCESS_KEY: 'zzz'])
and:
Global.config = [fusion: [exportAwsAccessKeys: true]]
and:
def fusion = new FusionScriptLauncher(Mock(TaskBean), 's3', S3PathFactory.parse('s3://foo/work'))
expect:
fusion.fusionEnv() == [AWS_ACCESS_KEY_ID: 'xxx',
AWS_SECRET_ACCESS_KEY: 'zzz',
FUSION_WORK: '/fusion/s3/foo/work',
FUSION_TAGS: "[.command.*|.exitcode|.fusion.*](nextflow.io/metadata=true),[*](nextflow.io/temporary=true)"
]
cleanup:
Global.config = null
SysEnv.pop()
}
def 'should get fusion env with aws credentials in nextflow config' () {
given:
SysEnv.push([:])
and:
def CONFIG = [fusion: [exportAwsAccessKeys: true], aws: [accessKey: 'k1', secretKey: 's1', client: [endpoint: 'http://minio.com']]]
Global.config = CONFIG
and:
def fusion = new FusionScriptLauncher(Mock(TaskBean), 's3', S3PathFactory.parse('s3://foo/work'))
expect:
fusion.fusionEnv() == [AWS_ACCESS_KEY_ID: 'k1',
AWS_SECRET_ACCESS_KEY: 's1',
AWS_S3_ENDPOINT: 'http://minio.com',
FUSION_WORK: '/fusion/s3/foo/work',
FUSION_TAGS: "[.command.*|.exitcode|.fusion.*](nextflow.io/metadata=true),[*](nextflow.io/temporary=true)"
]
cleanup:
Global.config = null
SysEnv.pop()
}
}

View File

@@ -0,0 +1,63 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.extension
import java.nio.file.Path
import java.nio.file.Paths
import groovyx.gpars.dataflow.DataflowReadChannel
import nextflow.Global
import nextflow.Session
import test.BaseSpec
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class PublishOpS3Test extends BaseSpec {
def 'should infer task dir' () {
given:
Global.config = Collections.emptyMap()
and:
def BASE = '/some/work/dir' as Path
def BUCKET_DIR = 's3://other/bucket/dir' as Path
def sess = Mock(Session) {
getWorkDir() >> BASE
getBucketDir() >> BUCKET_DIR
}
def op = new PublishOp(sess, 'foo', Mock(DataflowReadChannel), [to:'/target'])
when:
def result = op.getTaskDir( BASE.resolve('xx/yyyy/this/and/that.txt') )
then:
result == Paths.get('/some/work/dir/xx/yyyy')
when:
result = op.getTaskDir( BUCKET_DIR.resolve('pp/qqqq/other/file.fasta') )
then:
result == 's3://other/bucket/dir/pp/qqqq' as Path
when:
result = op.getTaskDir( BASE.resolve('xx/foo.txt') )
then:
result == null
}
}

View File

@@ -0,0 +1,85 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.file
import java.nio.file.Path
import nextflow.Global
import nextflow.Session
import nextflow.SysEnv
import spock.lang.Specification
import spock.lang.Unroll
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class FileHelperS3Test extends Specification {
@Unroll
def 'should convert to canonical path with base' () {
given:
SysEnv.push(NXF_FILE_ROOT: 's3://host.com/work')
expect:
FileHelper.toCanonicalPath(VALUE) == EXPECTED
cleanup:
SysEnv.pop()
where:
VALUE | EXPECTED
null | null
'file.txt' | FileSystemPathFactory.parse('s3://host.com/work/file.txt')
Path.of('file.txt') | FileSystemPathFactory.parse('s3://host.com/work/file.txt')
and:
'./file.txt' | FileSystemPathFactory.parse('s3://host.com/work/file.txt')
'.' | FileSystemPathFactory.parse('s3://host.com/work')
'./' | FileSystemPathFactory.parse('s3://host.com/work')
'../file.txt' | FileSystemPathFactory.parse('s3://host.com/file.txt')
and:
'/file.txt' | Path.of('/file.txt')
Path.of('/file.txt') | Path.of('/file.txt')
}
def 'should convert to a canonical path' () {
given:
Global.session = Mock(Session) { getConfig() >> [:] }
expect:
FileHelper.toCanonicalPath(VALUE).toUri() == EXPECTED
where:
VALUE | EXPECTED
's3://foo/some/file.txt' | new URI('s3:///foo/some/file.txt')
's3://foo/some///file.txt' | new URI('s3:///foo/some/file.txt')
}
@Unroll
def 'should remove consecutive slashes in the path' () {
given:
Global.session = Mock(Session) { getConfig() >> [:] }
expect:
FileHelper.asPath(STR).toUri() == EXPECTED
where:
STR | EXPECTED
's3://foo//this/that' | new URI('s3:///foo/this/that')
's3://foo//this///that' | new URI('s3:///foo/this/that')
}
}

View File

@@ -0,0 +1,76 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.processor
import java.nio.file.FileSystems
import java.nio.file.Files
import nextflow.Global
import nextflow.Session
import nextflow.cloud.aws.nio.S3Path
import nextflow.file.FileHelper
import spock.lang.Specification
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class PublishDirS3Test extends Specification {
def 'should change mode to `copy`' () {
given:
def processor = [:] as TaskProcessor
processor.name = 'foo'
def targetDir = FileHelper.asPath( 's3://bucket/work' )
def publisher = new PublishDir(mode:'symlink', path: targetDir, sourceFileSystem: FileSystems.default)
when:
publisher.validatePublishMode()
then:
publisher.mode == PublishDir.Mode.COPY
}
def 'should tag files' () {
given:
def folder = Files.createTempDirectory('test')
def source = folder.resolve('hello.txt'); source.text = 'Hello'
and:
def processor = [:] as TaskProcessor
processor.name = 'foo'
and:
def targetDir = FileHelper.asPath( 's3://bucket/work' )
def publisher = new PublishDir(tags: [FOO:'this',BAR:'that'], path: targetDir, sourceFileSystem: FileSystems.default)
def spy = Spy(publisher)
when:
spy.apply1(source, true)
then:
1 * spy.safeProcessFile(source, _) >> { sourceFile, s3File ->
assert s3File instanceof S3Path
assert (s3File as S3Path).getTagsList().find{ it.key()=='FOO'}.value() == 'this'
assert (s3File as S3Path).getTagsList().find{ it.key()=='BAR'}.value() == 'that'
}
cleanup:
folder?.deleteDir()
}
}

View File

@@ -0,0 +1,164 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.util
import nextflow.cloud.aws.util.ConfigParser
import spock.lang.Specification
import spock.lang.Unroll
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
class ConfigParserTest extends Specification {
def 'should parse section' () {
given:
def parser = new ConfigParser()
expect:
parser.parseSection(LINE) == EXPECTED
where:
LINE | EXPECTED
'foo' | null
'[foo' | null
and:
'[foo]' | 'foo'
'[profile foo]' | 'foo'
}
def 'should parse config' () {
given:
def parser = new ConfigParser()
def CONFIG = '''
[foo]
one = 1
two = 2
[bar]
alpha = 3
gamma = 4
'''.stripIndent()
when:
parser.parseConfig(CONFIG)
then:
parser.content.size() == 2
parser.content['foo'] == ['one = 1', 'two = 2']
parser.content['bar'] == ['alpha = 3', 'gamma = 4']
}
def 'should not merge overlapping keys' () {
given:
def parser = new ConfigParser()
def CONFIG1 = '''
[alpha]
a1=1
[beta]
b2=2
b3=3
'''.stripIndent()
def CONFIG2 = '''
[beta]
b3=30
b4=4
'''.stripIndent()
when:
parser.parseConfig(CONFIG1)
parser.parseConfig(CONFIG2)
then:
parser.content.size() == 2
and:
parser.content['alpha'] == ['a1=1']
parser.content['beta'] == ['b2=2','b3=3','b4=4']
}
def 'should load and merge config' () {
given:
def parser = new ConfigParser()
def CONFIG1 = '''
[alpha]
a1
'''.stripIndent()
def CONFIG2 = '''
[beta]
b1
'''.stripIndent()
def CONFIG3 = '''
[alpha]
a2
[beta]
b2
[omega]
z9
'''.stripIndent()
when:
parser.parseConfig(CONFIG1)
parser.parseConfig(CONFIG2)
parser.parseConfig(CONFIG3)
then:
parser.content.size() == 3
and:
parser.content['alpha'] == ['a1','a2']
parser.content['beta'] == ['b1','b2']
parser.content['omega'] == ['z9']
expect:
parser.text() == '''\
[alpha]
a1
a2
[beta]
b1
b2
[omega]
z9
'''.stripIndent()
}
@Unroll
def 'should match key' () {
given:
def parser = new ConfigParser()
expect:
parser.findKey(LINE) == EXPECTED
where:
LINE | EXPECTED
'foo' | null
'foo=' | 'foo'
'foo=1' | 'foo'
' foo = 1 ' | 'foo'
' foo =1 ' | 'foo'
}
}

View File

@@ -0,0 +1,45 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.util
import nextflow.cloud.aws.util.S3PathFactory
import spock.lang.Specification
/**
*
* @author Ben Sherman <bentshermann@gmail.com>
*/
class S3PathSerializerTest extends Specification {
def 'should serialise s3 path' () {
when:
def path = S3PathFactory.parse('s3://mybucket/file.txt')
def buffer = KryoHelper.serialize(path)
then:
KryoHelper.deserialize(buffer).getClass().getName() == 'nextflow.cloud.aws.nio.S3Path'
KryoHelper.deserialize(buffer) == S3PathFactory.parse('s3://mybucket/file.txt')
}
def 'should serialise s3 path with spaces' () {
when:
def path = S3PathFactory.parse('s3://mybucket/file with spaces.txt')
def buffer = KryoHelper.serialize(path)
then:
KryoHelper.deserialize(buffer).getClass().getName() == 'nextflow.cloud.aws.nio.S3Path'
KryoHelper.deserialize(buffer) == S3PathFactory.parse('s3://mybucket/file with spaces.txt')
}
}

View File

@@ -0,0 +1,18 @@
#
# Copyright 2013-2026, Seqera Labs
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
max_copy_size = 10000000
upload_chunk_size = 5242880

View File

@@ -0,0 +1,31 @@
<!--
~ Copyright 2013-2026, Seqera Labs
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} %m%n</pattern>
</encoder>
</appender>
<logger name="org.apache.http" level="INFO" />
<logger name="software.amazon" level="INFO" />
<logger name="com.upplication" level="DEBUG" />
<root level="DEBUG">
<appender-ref ref="STDOUT"/>
</root>
</configuration>

View File

@@ -0,0 +1,85 @@
# Microsoft Azure plugin for Nextflow
## Summary
The Microsoft Azure plugin provides support for Azure Blob Storage as a file system, and Azure Batch as a compute executor for Nextflow pipelines.
## Get Started
To use this plugin, add it to your `nextflow.config`:
```groovy
plugins {
id 'nf-azure'
}
```
Configure your Azure credentials and services:
```groovy
azure {
storage {
accountName = '<YOUR STORAGE ACCOUNT NAME>'
accountKey = '<YOUR STORAGE ACCOUNT KEY>'
}
batch {
endpoint = 'https://<YOUR BATCH ACCOUNT NAME>.<REGION>.batch.azure.com'
accountName = '<YOUR BATCH ACCOUNT NAME>'
accountKey = '<YOUR BATCH ACCOUNT KEY>'
}
}
```
Set the executor and work directory:
```groovy
process.executor = 'azurebatch'
workDir = 'az://<YOUR CONTAINER>/work'
```
## Examples
### Basic Azure Batch Configuration
```groovy
plugins {
id 'nf-azure'
}
azure {
storage {
accountName = 'mystorageaccount'
accountKey = System.getenv('AZURE_STORAGE_KEY')
}
batch {
endpoint = 'https://mybatchaccount.westeurope.batch.azure.com'
accountName = 'mybatchaccount'
accountKey = System.getenv('AZURE_BATCH_KEY')
autoPoolMode = true
deletePoolsOnCompletion = true
}
}
process.executor = 'azurebatch'
workDir = 'az://mycontainer/work'
```
### Using Managed Identity
```groovy
azure {
managedIdentity {
clientId = '<YOUR MANAGED IDENTITY CLIENT ID>'
}
}
```
## Resources
- [Azure Batch Executor Documentation](https://nextflow.io/docs/latest/azure.html)
## License
[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0)

View File

@@ -0,0 +1 @@
1.22.2

View File

@@ -0,0 +1,116 @@
[
{
"cloudName": "AzureCloud",
"homeTenantId": "7005851f-400b-4acb-8bc1-12c44a7d39e5",
"id": "cb4ff255-ac8c-4721-83bd-2d98e75b50d7",
"isDefault": true,
"managedByTenants": [],
"name": "Free Trial",
"state": "Enabled",
"tenantId": "7005851f-400b-4acb-8bc1-12c44a7d39e5",
"user": {
"name": "paolo@seqera.io",
"type": "user"
}
}
]
$ az login
$ az group create --name my-storage-group --location westeurope
$ az storage account create --resource-group my-resource-group --name nfaccount --location westeurope
{- Finished ..
"accessTier": "Hot",
"azureFilesIdentityBasedAuthentication": null,
"blobRestoreStatus": null,
"creationTime": "2020-05-15T20:42:17.206927+00:00",
"customDomain": null,
"enableHttpsTrafficOnly": true,
"encryption": {
"keySource": "Microsoft.Storage",
"keyVaultProperties": null,
"services": {
"blob": {
"enabled": true,
"keyType": "Account",
"lastEnabledTime": "2020-05-15T20:42:17.300678+00:00"
},
"file": {
"enabled": true,
"keyType": "Account",
"lastEnabledTime": "2020-05-15T20:42:17.300678+00:00"
},
"queue": null,
"table": null
}
},
"failoverInProgress": null,
"geoReplicationStats": null,
"id": "/subscriptions/cb4ff255-ac8c-4721-83bd-2d98e75b50d7/resourceGroups/my-resource-group/providers/Microsoft.Storage/storageAccounts/nfaccount",
"identity": null,
"isHnsEnabled": null,
"kind": "StorageV2",
"largeFileSharesState": null,
"lastGeoFailoverTime": null,
"location": "westeurope",
"name": "nfaccount",
"networkRuleSet": {
"bypass": "AzureServices",
"defaultAction": "Allow",
"ipRules": [],
"virtualNetworkRules": []
},
"primaryEndpoints": {
"blob": "https://nfaccount.blob.core.windows.net/",
"dfs": "https://nfaccount.dfs.core.windows.net/",
"file": "https://nfaccount.file.core.windows.net/",
"internetEndpoints": null,
"microsoftEndpoints": null,
"queue": "https://nfaccount.queue.core.windows.net/",
"table": "https://nfaccount.table.core.windows.net/",
"web": "https://nfaccount.z6.web.core.windows.net/"
},
"primaryLocation": "westeurope",
"privateEndpointConnections": [],
"provisioningState": "Succeeded",
"resourceGroup": "my-resource-group",
"routingPreference": null,
"secondaryEndpoints": {
"blob": "https://nfaccount-secondary.blob.core.windows.net/",
"dfs": "https://nfaccount-secondary.dfs.core.windows.net/",
"file": null,
"internetEndpoints": null,
"microsoftEndpoints": null,
"queue": "https://nfaccount-secondary.queue.core.windows.net/",
"table": "https://nfaccount-secondary.table.core.windows.net/",
"web": "https://nfaccount-secondary.z6.web.core.windows.net/"
},
"secondaryLocation": "northeurope",
"sku": {
"name": "Standard_RAGRS",
"tier": "Standard"
},
"statusOfPrimary": "available",
"statusOfSecondary": "available",
"tags": {},
"type": "Microsoft.Storage/storageAccounts"
}
Connection string
BlobEndpoint=https://nfaccount.blob.core.windows.net/;QueueEndpoint=https://nfaccount.queue.core.windows.net/;FileEndpoint=https://nfaccount.file.core.windows.net/;TableEndpoint=https://nfaccount.table.core.windows.net/;SharedAccessSignature=sv=2019-10-10&ss=bfqt&srt=sco&sp=rwdlacupx&se=2020-05-16T04:48:12Z&st=2020-05-15T20:48:12Z&spr=https&sig=9xCn8O%2FxjKroc7YOc9fHffiNOtRaY46spv9VJa4D8pU%3D
SAS token
?sv=2019-10-10&ss=bfqt&srt=sco&sp=rwdlacupx&se=2020-05-16T04:48:12Z&st=2020-05-15T20:48:12Z&spr=https&sig=9xCn8O%2FxjKroc7YOc9fHffiNOtRaY46spv9VJa4D8pU%3D
Blob service SAS URL
https://nfaccount.blob.core.windows.net/?sv=2019-10-10&ss=bfqt&srt=sco&sp=rwdlacupx&se=2020-05-16T04:48:12Z&st=2020-05-15T20:48:12Z&spr=https&sig=9xCn8O%2FxjKroc7YOc9fHffiNOtRaY46spv9VJa4D8pU%3D
File service SAS URL
https://nfaccount.file.core.windows.net/?sv=2019-10-10&ss=bfqt&srt=sco&sp=rwdlacupx&se=2020-05-16T04:48:12Z&st=2020-05-15T20:48:12Z&spr=https&sig=9xCn8O%2FxjKroc7YOc9fHffiNOtRaY46spv9VJa4D8pU%3D
Queue service SAS URL
https://nfaccount.queue.core.windows.net/?sv=2019-10-10&ss=bfqt&srt=sco&sp=rwdlacupx&se=2020-05-16T04:48:12Z&st=2020-05-15T20:48:12Z&spr=https&sig=9xCn8O%2FxjKroc7YOc9fHffiNOtRaY46spv9VJa4D8pU%3D
Table service SAS URL
https://nfaccount.table.core.windows.net/?sv=2019-10-10&ss=bfqt&srt=sco&sp=rwdlacupx&se=2020-05-16T04:48:12Z&st=2020-05-15T20:48:12Z&spr=https&sig=9xCn8O%2FxjKroc7YOc9fHffiNOtRaY46spv9VJa4D8pU%3D

View File

@@ -0,0 +1,104 @@
» az login
» az group create --name nf-storage-group --location westeurope
{
"id": "/subscriptions/f7ef67b9-51f5-4fc2-91a8-0f9cce0c6598/resourceGroups/nf-storage-group",
"location": "westeurope",
"managedBy": null,
"name": "nf-storage-group",
"properties": {
"provisioningState": "Succeeded"
},
"tags": null,
"type": "Microsoft.Resources/resourceGroups"
}
» az storage account create --resource-group nf-storage-group --name nfstore --location westeurope
{- Finished ..
"accessTier": "Hot",
"allowBlobPublicAccess": null,
"azureFilesIdentityBasedAuthentication": null,
"blobRestoreStatus": null,
"creationTime": "2020-07-18T07:52:22.585318+00:00",
"customDomain": null,
"enableHttpsTrafficOnly": true,
"encryption": {
"keySource": "Microsoft.Storage",
"keyVaultProperties": null,
"requireInfrastructureEncryption": null,
"services": {
"blob": {
"enabled": true,
"keyType": "Account",
"lastEnabledTime": "2020-07-18T07:52:22.679222+00:00"
},
"file": {
"enabled": true,
"keyType": "Account",
"lastEnabledTime": "2020-07-18T07:52:22.679222+00:00"
},
"queue": null,
"table": null
}
},
"failoverInProgress": null,
"geoReplicationStats": null,
"id": "/subscriptions/f7ef67b9-51f5-4fc2-91a8-0f9cce0c6598/resourceGroups/nf-storage-group/providers/Microsoft.Storage/storageAccounts/nfstore",
"identity": null,
"isHnsEnabled": null,
"kind": "StorageV2",
"largeFileSharesState": null,
"lastGeoFailoverTime": null,
"location": "westeurope",
"minimumTlsVersion": null,
"name": "nfstore",
"networkRuleSet": {
"bypass": "AzureServices",
"defaultAction": "Allow",
"ipRules": [],
"virtualNetworkRules": []
},
"primaryEndpoints": {
"blob": "https://nfstore.blob.core.windows.net/",
"dfs": "https://nfstore.dfs.core.windows.net/",
"file": "https://nfstore.file.core.windows.net/",
"internetEndpoints": null,
"microsoftEndpoints": null,
"queue": "https://nfstore.queue.core.windows.net/",
"table": "https://nfstore.table.core.windows.net/",
"web": "https://nfstore.z6.web.core.windows.net/"
},
"primaryLocation": "westeurope",
"privateEndpointConnections": [],
"provisioningState": "Succeeded",
"resourceGroup": "nf-storage-group",
"routingPreference": null,
"secondaryEndpoints": {
"blob": "https://nfstore-secondary.blob.core.windows.net/",
"dfs": "https://nfstore-secondary.dfs.core.windows.net/",
"file": null,
"internetEndpoints": null,
"microsoftEndpoints": null,
"queue": "https://nfstore-secondary.queue.core.windows.net/",
"table": "https://nfstore-secondary.table.core.windows.net/",
"web": "https://nfstore-secondary.z6.web.core.windows.net/"
},
"secondaryLocation": "northeurope",
"sku": {
"name": "Standard_RAGRS",
"tier": "Standard"
},
"statusOfPrimary": "available",
"statusOfSecondary": "available",
"tags": {},
"type": "Microsoft.Storage/storageAccounts"
}
az storage blob generate-sas \
--account-name nfstore \
--container-name my-data \
--name MyBlob \
--permissions racdw \
--expiry 2021-06-15

View File

@@ -0,0 +1,74 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
plugins {
id 'io.nextflow.nextflow-plugin' version "${nextflowPluginVersion}"
id 'java-test-fixtures'
}
nextflowPlugin {
nextflowVersion = '25.11.0-edge'
provider = "${nextflowPluginProvider}"
description = 'Enables Azure cloud execution through Batch service with native Blob storage access and comprehensive authentication options'
className = 'nextflow.cloud.azure.AzurePlugin'
useDefaultDependencies = false
generateSpec = false
extensionPoints = [
'nextflow.cloud.azure.batch.AzBatchExecutor',
'nextflow.cloud.azure.config.AzConfig',
'nextflow.cloud.azure.file.AzPathFactory',
'nextflow.cloud.azure.file.AzPathSerializer',
'nextflow.cloud.azure.fusion.AzFusionEnv',
]
}
sourceSets {
main.java.srcDirs = []
main.groovy.srcDirs = ['src/main']
main.resources.srcDirs = ['src/resources']
test.groovy.srcDirs = ['src/test']
test.java.srcDirs = ['src/testResources']
test.resources.srcDirs = []
}
configurations {
// see https://docs.gradle.org/4.1/userguide/dependency_management.html#sub:exclude_transitive_dependencies
runtimeClasspath.exclude group: 'org.slf4j', module: 'slf4j-api'
}
dependencies {
compileOnly project(':nextflow')
compileOnly 'org.slf4j:slf4j-api:2.0.17'
compileOnly 'org.pf4j:pf4j:3.14.1'
api('com.azure:azure-storage-blob:12.33.2') {
exclude group: 'org.slf4j', module: 'slf4j-api'
}
api('com.azure:azure-compute-batch:1.0.0-beta.3') {
exclude group: 'org.slf4j', module: 'slf4j-api'
exclude group: 'com.google.guava', module: 'guava'
}
api('com.azure:azure-identity:1.18.2') {
exclude group: 'org.slf4j', module: 'slf4j-api'
}
// Force patched version to address GHSA-72hv-8253-57qq (jackson-core Number Length Constraint Bypass DoS)
runtimeOnly 'com.fasterxml.jackson.core:jackson-core:2.18.6'
testImplementation(testFixtures(project(":nextflow")))
testImplementation project(':nextflow')
testImplementation "org.apache.groovy:groovy:4.0.31"
testImplementation "org.apache.groovy:groovy-nio:4.0.31"
}

View File

@@ -0,0 +1,254 @@
nf-azure changelog
===================
1.22.2 - 26 Mar 2026
- Fix netty and jackson vulnerabilities (#6955) [8dafdd95d]
- Fix security vulnerabilities (#6938) [8b1ab40c4]
1.22.1 - 17 Mar 2026
- Record types (#6679) [d54ff29af]
1.22.0 - 28 Feb 2026
- Handle Azure Batch ActiveJobAndScheduleQuotaReached with retry (#6874) [6e66aaa58]
- Update default Azure Batch VM image to Ubuntu 24.04 (#6844) [b621fc7cb]
1.21.0 - 28 Nov 2025
- Optimize exit code handling by relying on scheduler status for successful executions (#6484) [454a2ae85]
1.20.2 - 21 Oct 2025
- Rename `config.schema` package to `config.spec` (#6485) [ef0d2d601]
1.20.1 - 8 Oct 2025
- Fix unstage controls in command.run when using storeDir (#6364) [a5756da3e]
1.19.0 - 15 Aug 2025
- Fix Azure Batch startTask concatenation issue (#6300) (#6305) [ci fast] [81d5c0dc]
- Unify nf-lang config scopes with runtime classes (#6271) [bfa67ca3]
- Update Azure and AWS deps (#6343) [ci fast] [ff00e2de]
- Bump groovy 4.0.28 (#6304) [ci fast] [a468f8ef]
- Bump netty-codec-http2:4.1.124.Final [7e690b44]
1.18.0 - 6 Jun 2025
- Allow users to provide implicit managed identity to Azure Batch (#6144) [d1f70f50]
- Minor Azure Batch disk slot calculation demoted to debug (#6234) [ci skip] [c65955ce]
- Bump Slf4j version 2.0.17 [93199e09]
1.17.0 - 2 Jun 2025
- Add support for Azure Managed identities on Azure worker nodes with Fusion (#6118) [37981a5f]
- Bump Groovy to version 4.0.27 (#6125) [258e1790]
1.16.0 - 8 May 2025
- Add azure.batch.jobMaxWallClockTime config option (#5996) [74963fdc]
- Remove test constructors or mark as TestOnly (#5216) [d4fadd42]
1.15.0 - 23 Apr 2025
- Update Azure Batch VM sizes and regions (#5985) [297150b8]
1.14.1 - 19 Mar 2025
- Fix handling of exit status with Azure Batch and Fusion (#5806) [7085862d]
- Removing Azure vmList from log [67ffc8ab]
1.14.0 - 17 Mar 2025
- Add cpu-shares and memory limits to Azure Batch tasks (#5799) [f9c0cbfd]
- Add disk directive support in Azure Batch (#5784) [113d7250]
- Validates Azure region before checking available VMs (#5108) [080893a2]
- Fix Ignore Azure pool already exists error (#5721) [e267961b]
- Bump Ubuntu 22.04 as default SKU for Azure Batch (#5804) [e0ba536d]
- Bump groovy 4.0.26 [f740bc56]
1.13.0 - 12 Feb 2025
- Allow Azure Batch tasks to be submitted to different pools (#5766) [76790d2a]
- Fix Check for number of low priority nodes in Azure Batch before raising a pool resize error (#5576) [9b528c11]
- Update azure deps [b163da95]
- Bump groovy 4.0.25 [19c40a4a]
- Bump io.netty:netty-handler:4.1.118.Final [db4a9037]
- Bump net.minidev:json-smart:2.5.2 [b5c4faf4]
- Bump netty-common:4.1.118.Final [8574e243]
1.12.0 - 20 Jan 2025
- Ensure job is killed when exception in task status check (#5561) [9eefd207]
- Bump logback 1.5.13 + slf4j 2.0.16 [cc0163ac]
- Bump groovy 4.0.24 missing deps [40670f7e]
1.11.0 - 3 Dec 2024
- Detecting errors in data unstaging (#5345) [3c8e602d]
- Bump netty-common to version 4.1.115.Final [d1bbd3d0]
- Bump groovy 4.0.24 [dd71ad31]
- Bump com.azure:azure-identity from 1.11.3 to 1.12.2 (#5449) [cb70f1df]
- Target Java 17 as minimal Java version (#5045) [0140f954]
1.10.1 - 27 Oct 2024
- Demote azure batch task status log level to trace (#5416) [ci skip] [d6c684bb]
1.10.0 - 2 Oct 2024
- Fix Azure Fusion env misses credentials when no key or SAS provided (#5328) [e11382c8]
- Bump groovy 4.0.23 (#5303) [fe3e3ac7]
1.9.0 - 4 Sep 2024
- Support Azure Managed Identities in Fusion configuration logic (#5278) [a0bf8b40]
1.8.1 - 5 Aug 2024
- Bump pf4j to version 3.12.0 [96117b9a]
1.8.0 - 8 Jul 2024
- Update Azure VMs and regions for 2024-07-01 (#5100) [12b027ee]
- Add retry options to Azure Blob client (#5098) [7d5e5d2b]
- Bump groovy 4.0.22 [284a6606]
1.7.0 - 17 Jun 2024
- Add support for Azure managed identity (#4897) [21ca16e6]
- Fix Azure system-assigned managed identity [a639a17d]
- Fix support for Azure managed identity clientId [306814e7]
- Bump azure-compute-batch:1.0.0-beta.2 [c08dc49b]
- Bump azure-storage-blob 12.26.1 [c76ff5e7]
1.6.1 - 1 Aug 2024
- Update Azure batch deps [72576648]
- Bump pf4j to version 3.12.0 [1a8f086a]
1.6.0 - 15 Apr 2024
- Add support for Azure custom startTask (#4913) [27d01e3a]
- Improve control on azcopy install (#4883) [01447d5c]
- Fix Azure pool creation [2ee4d11e]
- Bump groovy 4.0.21 [9e08390b]
1.5.1 - 10 Mar 2024
- Update Azure dependencies [1bcbaf0d]
- Bump groovy 4.0.19 [854dc1f0]
1.5.0 - 5 Feb 2024
- Fix azure retry policy (#4638) [85bab699]
- Use AZURE_STORAGE_SAS_TOKEN environment variable (#4627) [2e02afbf]
- Bump Groovy 4 (#4443) [9d32503b]
1.4.0 - 24 Nov 2023
- Fix security vulnerabilities (#4513) [a310c777]
- Add support for Azure low-priority pool (#4527) [8320ea10]
1.3.3-patch3 - 31 Jul 2024
- Update Azure batch deps [e0c6d77d]
1.3.3-patch2 - 11 Jun 2024
- Fix security vulnerabilities (#5057) [6d8765b8]
1.3.3-patch1 - 28 May 2024
- Bump dependency with Nextflow 23.10.2
1.3.3 - 12 Jan 2023
- Use AZURE_STORAGE_SAS_TOKEN environment variable (#4627) [2e1cb413]
- Fix azure retry policy (#4638) [2bc3cf0e]
1.3.2 - 28 Sep 2023
- Retry TimeoutException in azure file system (#4295) [79248355]
1.3.1 - 10 Sep 2023
- Disable staging script for remote work dir (#4282) [80f7cd46]
- Fix IOException should be thrown when failing to create Azure directory [b0bdfd79]
- Fix security deps in nf-azure plugin [c30d5211]
- Bump groovy 3.0.19 [cb411208]
1.3.0 - 17 Aug 2023
- Add resource labels support for Azure Batch (#4178) [7b5e50a1]
- Fix typos in source code comments (#4173) [e78bc37e]
1.2.0 - 5 Aug 2023
- Add deleteTasksOnCompletion to Azure Batch configuration (#4114) [b14674dc]
1.1.4 - 22 Jul 2023
- Fix failing test [9a52f848]
- Fix Improve error message for invalid Azure URI [0f4d8867]
- Fix invalid detection of hierarchical namespace stub blobs as files (#4046) [ce06c877]
- Wait for all child processes in nxf_parallel (#4050) [60a5f1a7]
- Bump Groovy 3.0.18 [207eb535]
1.1.3 - 19 Jum 2023
- Increase Azure min retry delay to 250ms [2e77e5e4]
- Fix AzFileSystem retry policy (2) [c2f3cc96]
1.1.2 - 19 Jun 2023
- Fix AzFileSystem retry policy [ba9b6d18]
- Improve Azure retry logging [de58697a]
1.1.1 - 14 Jun 2023
- Add retry policy on Az blob operations [295bc1ff]
- Bump azure-storage-blob:12.22.1 [2a36fa77]
- Fix S3 path normalization [b75ec444]
1.1.0 - 15 May 2023
- Add support for `time` directive in Azure Batch (#3869) [5c11a0d4]
- Increase Azure default maxRetries to 10 [a017139f]
- Fix Azure jobs correctly deleted after completion (#3927) [b173a983]
- Fix missing SAS token fusion env for Azure [43015029]
- Fix failing tests [06337962]
- Fix Azure pool creation when using scaling formula (#3868) [79984a87]
- Security fixes [973b7bea]
- Update logging libraries [d7eae86e]
- Bump groovy 3.0.17 [cfe4ba56]
1.0.1 - 15 Apr 2023
- Security fixes [83e8fd6a]
- Fix Azure pool creation when using scaling formula (#3868) [84a808a5]
1.0.0 - 1 Apr 2023
- Add support for Fusion to Azure Batch executor (#3209) [3d3cbfa2]
- Fix NoSuchMethodError String.stripIndent with Java 11 [308eafe6]
0.16.0 - 19 Mar 2023
- Add azure batch pool virtualNetwork option (#3723) [e3917b8e]
- Update Azure VM sizes (#3751) [1d06e9a6]
- Increase pwd obfuscation min length [ba23d036]
- Bump groovy 3.0.16 [d3ff5dcb]
0.15.1 - 14 Jan 2023
- Improve container native executor configuration [03126371]
- Minor logging change [646776a8]
- Bump groovy 3.0.14 [7c204236]
0.15.0 - 23 Nov 2022
- Allow identity based authentication on Azure Batch (#3132) [a08611be]
- Add Azure SAS token validation [e2244b48]
0.14.1 - 10 Sep 2022
- Fix Azure NPE on missing pool opts [d5c0aabd]
- Fix shutdown/cleanup hooks invocation [f4185070
0.14.0 - 7 Sep 2022
- Fix thread pool race condition on shutdown [8d2b0587]
- Update Azure vm types [80f5fbe4]
0.13.5 - 1 Sep 2022
- Get rid of remote bin dir [6cfb51e7]
- Fix typos in log messages [76a87c72]
- Improve Az Batch err handling and testing [85d31e8d]
0.13.4 - 1 Aug 2022
- Add retry when Azure submit fails with OperationTimedOut [6a3f9742]
0.13.3 - 13 Jul 2022
- Fix escape unstage outputs with double quotes #2912 #2904 #2790 [49ff02a6]
0.13.2 - 15 May 2022
- Update default SKU for Azure Batch 'batch.node.ubuntu 20.04' [be60fc14]
0.13.1 - 2 Apr 2022
- Add retry policy Azure create job [792820a2]
0.13.0 - 27 Mar 2022
- Add azcopy fine grain config settings [3998a56b]
- Add retry policy to Az Batch operations [991c6175]
0.12.0 - 6 Feb 2022
- Generate "account" token instead of container token when not SAS token is not provided [d5125975d]
- Fix upload of nested directory outputs on azure [85ad55225] [744447155]
0.11.2 - 22 Nov 2021
- Fix Azure executor fail to cleanup jobs on completion [533448be4]
- Make Azure executor logging less verbose [e0b2117ad]
0.11.1 - 18 Nov 2021
- Fix NPE when pool is not configured and auto pool mode is not specified
0.11.0 - 12 Oct 2021
- Add Azure pool node SKU selection #2360 [9afcac756]
- Add Built-in support for Azure File Shares #2285 [a4c3e0ad5]
- Add missing information for pulling images from private registry in Azure Batch #2355 [040e190bd]

View File

@@ -0,0 +1,42 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.cloud.azure
import groovy.transform.CompileStatic
import nextflow.cloud.azure.nio.AzFileSystemProvider
import nextflow.file.FileHelper
import nextflow.plugin.BasePlugin
import org.pf4j.PluginWrapper
/**
* Azure cloud plugin for Nextflow
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@CompileStatic
class AzurePlugin extends BasePlugin {
AzurePlugin(PluginWrapper wrapper) {
super(wrapper)
}
@Override
void start() {
super.start()
// register Azure file system
FileHelper.getOrInstallProvider(AzFileSystemProvider)
}
}

Some files were not shown because too many files have changed in this diff Show More