Skip to content

Commit 93e8ed4

Browse files
authored
fix: update BlobReadChannelV2 handling to correctly restart for decompressed object (#1867)
* fix: update BlobReadChannelV2 handling to correctly restart for decompressed object When downloading bytes from gcs and decompressing them, a restart of the stream needs to pickup from the offset within the compressed bytes not the decompressed bytes. Prior to this change http-client was automatically applying gzip decompression to the stream it returns to us thereby causing our tracking to be off. This change updates our interaction with http client to always request the raw bytes without any transform applied to them, we then at a higher level can handle whether gzip decompression needs to be plumbed in. Fix up a couple channel closed state subtleties when buffering is used with decompression. Add a new test leveraging the testbench which forces a broken stream on some compressed object bytes. _NOTE_ This test depends on the next release of testbench and will be failing until we get that release. * deps(test): update testbench to v0.33.0
1 parent 2b94567 commit 93e8ed4

File tree

10 files changed

+150
-42
lines changed

10 files changed

+150
-42
lines changed

google-cloud-storage/src/main/java/com/google/cloud/storage/ApiaryUnbufferedReadableByteChannel.java

Lines changed: 21 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package com.google.cloud.storage;
1818

1919
import static com.google.cloud.storage.Utils.ifNonNull;
20+
import static java.util.Objects.requireNonNull;
2021

2122
import com.google.api.client.http.HttpHeaders;
2223
import com.google.api.client.http.HttpResponse;
@@ -49,6 +50,7 @@
4950
import java.nio.channels.ReadableByteChannel;
5051
import java.nio.channels.ScatteringByteChannel;
5152
import java.util.List;
53+
import java.util.Locale;
5254
import java.util.Map;
5355
import java.util.function.Function;
5456
import javax.annotation.concurrent.Immutable;
@@ -82,13 +84,9 @@ class ApiaryUnbufferedReadableByteChannel implements UnbufferedReadableByteChann
8284
this.options = options;
8385
this.resultRetryAlgorithm = resultRetryAlgorithm;
8486
this.open = true;
85-
this.position =
86-
apiaryReadRequest.getByteRangeSpec() != null
87-
? apiaryReadRequest.getByteRangeSpec().beginOffset()
88-
: 0;
87+
this.position = apiaryReadRequest.getByteRangeSpec().beginOffset();
8988
}
9089

91-
@SuppressWarnings("UnnecessaryContinue")
9290
@Override
9391
public long read(ByteBuffer[] dsts, int offset, int length) throws IOException {
9492
do {
@@ -113,12 +111,10 @@ public long read(ByteBuffer[] dsts, int offset, int length) throws IOException {
113111
// if our retry algorithm COULD allow a retry, continue the loop and allow trying to
114112
// open the stream again.
115113
sbc = null;
116-
continue;
117114
} else if (t instanceof IOException) {
118115
IOException ioE = (IOException) t;
119116
if (resultRetryAlgorithm.shouldRetry(StorageException.translate(ioE), null)) {
120117
sbc = null;
121-
continue;
122118
} else {
123119
throw ioE;
124120
}
@@ -148,11 +144,8 @@ private void setXGoogGeneration(long xGoogGeneration) {
148144

149145
private ScatteringByteChannel open() {
150146
try {
151-
Boolean b =
152-
(Boolean) apiaryReadRequest.options.get(StorageRpc.Option.RETURN_RAW_INPUT_STREAM);
153-
boolean returnRawInputStream = b != null ? b : true;
154147
ApiaryReadRequest request = apiaryReadRequest.withNewBeginOffset(position);
155-
Get get = createGetRequest(request, storage.objects(), xGoogGeneration, returnRawInputStream);
148+
Get get = createGetRequest(request, storage.objects(), xGoogGeneration);
156149

157150
HttpResponse media = get.executeMedia();
158151
InputStream content = media.getContent();
@@ -215,10 +208,7 @@ private ScatteringByteChannel open() {
215208

216209
@VisibleForTesting
217210
static Get createGetRequest(
218-
ApiaryReadRequest apiaryReadRequest,
219-
Objects objects,
220-
Long xGoogGeneration,
221-
boolean returnRawInputStream)
211+
ApiaryReadRequest apiaryReadRequest, Objects objects, Long xGoogGeneration)
222212
throws IOException {
223213
StorageObject from = apiaryReadRequest.getObject();
224214
Map<StorageRpc.Option, ?> options = apiaryReadRequest.getOptions();
@@ -262,7 +252,9 @@ static Get createGetRequest(
262252
base64.encode(hashFunction.hashBytes(base64.decode(key)).asBytes()));
263253
});
264254

265-
get.setReturnRawInputStream(returnRawInputStream);
255+
// gzip handling is performed upstream of here. Ensure we always get the raw input stream from
256+
// the request
257+
get.setReturnRawInputStream(true);
266258
String range = apiaryReadRequest.getByteRangeSpec().getHttpRangeHeader();
267259
if (range != null) {
268260
get.getRequestHeaders().setRange(range);
@@ -288,7 +280,7 @@ private static String getHeaderValue(@NonNull HttpHeaders headers, @NonNull Stri
288280
if (list.isEmpty()) {
289281
return null;
290282
} else {
291-
return list.get(0);
283+
return list.get(0).trim().toLowerCase(Locale.ENGLISH);
292284
}
293285
} else if (o instanceof String) {
294286
return (String) o;
@@ -303,27 +295,32 @@ private static String getHeaderValue(@NonNull HttpHeaders headers, @NonNull Stri
303295
static final class ApiaryReadRequest implements Serializable {
304296
private static final long serialVersionUID = -4059435314115374448L;
305297
private static final Gson gson = new Gson();
306-
private transient StorageObject object;
307-
private final Map<StorageRpc.Option, ?> options;
308-
private final ByteRangeSpec byteRangeSpec;
298+
@NonNull private transient StorageObject object;
299+
@NonNull private final Map<StorageRpc.Option, ?> options;
300+
@NonNull private final ByteRangeSpec byteRangeSpec;
309301

310302
private volatile String objectJson;
311303

312304
ApiaryReadRequest(
313-
StorageObject object, Map<StorageRpc.Option, ?> options, ByteRangeSpec byteRangeSpec) {
314-
this.object = object;
315-
this.options = options;
316-
this.byteRangeSpec = byteRangeSpec;
305+
@NonNull StorageObject object,
306+
@NonNull Map<StorageRpc.Option, ?> options,
307+
@NonNull ByteRangeSpec byteRangeSpec) {
308+
this.object = requireNonNull(object, "object must be non null");
309+
this.options = requireNonNull(options, "options must be non null");
310+
this.byteRangeSpec = requireNonNull(byteRangeSpec, "byteRangeSpec must be non null");
317311
}
318312

313+
@NonNull
319314
StorageObject getObject() {
320315
return object;
321316
}
322317

318+
@NonNull
323319
Map<StorageRpc.Option, ?> getOptions() {
324320
return options;
325321
}
326322

323+
@NonNull
327324
ByteRangeSpec getByteRangeSpec() {
328325
return byteRangeSpec;
329326
}

google-cloud-storage/src/main/java/com/google/cloud/storage/BaseStorageReadChannel.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ public final synchronized int read(ByteBuffer dst) throws IOException {
120120
}
121121

122122
@Override
123-
public ApiFuture<BlobInfo> getObject() {
123+
public final ApiFuture<BlobInfo> getObject() {
124124
return ApiFutures.transform(result, objectDecoder::decode, MoreExecutors.directExecutor());
125125
}
126126

@@ -136,7 +136,7 @@ protected final int getChunkSize() {
136136
}
137137

138138
@Nullable
139-
protected T getResolvedObject() {
139+
protected final T getResolvedObject() {
140140
if (result.isDone()) {
141141
return StorageException.wrapFutureGet(result);
142142
} else {

google-cloud-storage/src/main/java/com/google/cloud/storage/BlobReadChannelV2.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ final class BlobReadChannelV2 extends BaseStorageReadChannel<StorageObject> {
3232
private final StorageObject storageObject;
3333
private final Map<StorageRpc.Option, ?> opts;
3434
private final BlobReadChannelContext blobReadChannelContext;
35+
private final boolean autoGzipDecompression;
3536

3637
BlobReadChannelV2(
3738
StorageObject storageObject,
@@ -41,10 +42,15 @@ final class BlobReadChannelV2 extends BaseStorageReadChannel<StorageObject> {
4142
this.storageObject = storageObject;
4243
this.opts = opts;
4344
this.blobReadChannelContext = blobReadChannelContext;
45+
this.autoGzipDecompression =
46+
// RETURN_RAW_INPUT_STREAM means do not add GZIPInputStream to the pipeline. Meaning, if
47+
// RETURN_RAW_INPUT_STREAM is false, automatically attempt to decompress if Content-Encoding
48+
// gzip.
49+
Boolean.FALSE.equals(opts.get(StorageRpc.Option.RETURN_RAW_INPUT_STREAM));
4450
}
4551

4652
@Override
47-
public RestorableState<ReadChannel> capture() {
53+
public synchronized RestorableState<ReadChannel> capture() {
4854
ApiaryReadRequest apiaryReadRequest = getApiaryReadRequest();
4955
return new BlobReadChannelV2State(
5056
apiaryReadRequest, blobReadChannelContext.getStorageOptions(), getChunkSize());
@@ -56,6 +62,7 @@ protected LazyReadChannel<StorageObject> newLazyReadChannel() {
5662
ResumableMedia.http()
5763
.read()
5864
.byteChannel(blobReadChannelContext)
65+
.setAutoGzipDecompression(autoGzipDecompression)
5966
.buffered(getBufferHandle())
6067
.setApiaryReadRequest(getApiaryReadRequest())
6168
.build());

google-cloud-storage/src/main/java/com/google/cloud/storage/DefaultBufferedReadableByteChannel.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public int read(ByteBuffer dst) throws IOException {
4141
if (retEOF) {
4242
retEOF = false;
4343
return -1;
44-
} else if (!channel.isOpen()) {
44+
} else if (!enqueuedBytes() && !channel.isOpen()) {
4545
throw new ClosedChannelException();
4646
}
4747

@@ -133,7 +133,7 @@ public int read(ByteBuffer dst) throws IOException {
133133

134134
@Override
135135
public boolean isOpen() {
136-
return !retEOF && channel.isOpen();
136+
return enqueuedBytes() || (!retEOF && channel.isOpen());
137137
}
138138

139139
@Override

google-cloud-storage/src/main/java/com/google/cloud/storage/GapicDownloadSessionBuilder.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import com.google.api.gax.rpc.ServerStreamingCallable;
2424
import com.google.cloud.storage.BufferedReadableByteChannelSession.BufferedReadableByteChannel;
2525
import com.google.cloud.storage.UnbufferedReadableByteChannelSession.UnbufferedReadableByteChannel;
26+
import com.google.common.util.concurrent.MoreExecutors;
2627
import com.google.storage.v2.Object;
2728
import com.google.storage.v2.ReadObjectRequest;
2829
import com.google.storage.v2.ReadObjectResponse;
@@ -99,7 +100,9 @@ public UnbufferedReadableByteChannelSessionBuilder unbuffered() {
99100
return (object, resultFuture) -> {
100101
if (autoGzipDecompression) {
101102
return new GzipReadableByteChannel(
102-
new GapicUnbufferedReadableByteChannel(resultFuture, read, object, hasher));
103+
new GapicUnbufferedReadableByteChannel(resultFuture, read, object, hasher),
104+
ApiFutures.transform(
105+
resultFuture, Object::getContentEncoding, MoreExecutors.directExecutor()));
103106
} else {
104107
return new GapicUnbufferedReadableByteChannel(resultFuture, read, object, hasher);
105108
}

google-cloud-storage/src/main/java/com/google/cloud/storage/GzipReadableByteChannel.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,13 @@
1616

1717
package com.google.cloud.storage;
1818

19+
import com.google.api.core.ApiFuture;
1920
import com.google.cloud.storage.UnbufferedReadableByteChannelSession.UnbufferedReadableByteChannel;
20-
import com.google.storage.v2.Object;
21-
import java.io.*;
21+
import java.io.ByteArrayInputStream;
22+
import java.io.FilterInputStream;
23+
import java.io.IOException;
24+
import java.io.InputStream;
25+
import java.io.SequenceInputStream;
2226
import java.nio.ByteBuffer;
2327
import java.nio.channels.Channels;
2428
import java.nio.channels.ReadableByteChannel;
@@ -27,13 +31,15 @@
2731
import java.util.zip.GZIPInputStream;
2832

2933
final class GzipReadableByteChannel implements UnbufferedReadableByteChannel {
30-
private final GapicUnbufferedReadableByteChannel source;
34+
private final UnbufferedReadableByteChannel source;
35+
private final ApiFuture<String> contentEncoding;
3136

3237
private boolean retEOF = false;
3338
private ScatteringByteChannel delegate;
3439

35-
GzipReadableByteChannel(GapicUnbufferedReadableByteChannel source) {
40+
GzipReadableByteChannel(UnbufferedReadableByteChannel source, ApiFuture<String> contentEncoding) {
3641
this.source = source;
42+
this.contentEncoding = contentEncoding;
3743
}
3844

3945
@Override
@@ -54,10 +60,10 @@ public long read(ByteBuffer[] dsts, int offset, int length) throws IOException {
5460
source.read(wrap);
5561
try {
5662
// Step 2: wait for the object metadata, this is populated in the first message from GCS
57-
Object object = source.getResult().get();
63+
String contentEncoding = this.contentEncoding.get();
5864
// if the Content-Encoding is gzip, Step 3: wire gzip decompression into the byte path
5965
// this will have a copy impact as we are no longer controlling all the buffers
60-
if ("gzip".equals(object.getContentEncoding())) {
66+
if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
6167
// to wire gzip decompression into the byte path:
6268
// Create an input stream of the first4 bytes we already read
6369
ByteArrayInputStream first4again = new ByteArrayInputStream(first4);

google-cloud-storage/src/main/java/com/google/cloud/storage/HttpDownloadSessionBuilder.java

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.google.cloud.storage.BlobReadChannelV2.BlobReadChannelContext;
2626
import com.google.cloud.storage.BufferedReadableByteChannelSession.BufferedReadableByteChannel;
2727
import com.google.cloud.storage.UnbufferedReadableByteChannelSession.UnbufferedReadableByteChannel;
28+
import com.google.common.util.concurrent.MoreExecutors;
2829
import java.nio.ByteBuffer;
2930
import java.util.function.BiFunction;
3031
import javax.annotation.concurrent.Immutable;
@@ -51,10 +52,18 @@ public ReadableByteChannelSessionBuilder byteChannel(
5152
public static final class ReadableByteChannelSessionBuilder {
5253

5354
private final BlobReadChannelContext blobReadChannelContext;
55+
private boolean autoGzipDecompression;
5456
// private Hasher hasher; // TODO: wire in Hasher
5557

5658
private ReadableByteChannelSessionBuilder(BlobReadChannelContext blobReadChannelContext) {
5759
this.blobReadChannelContext = blobReadChannelContext;
60+
this.autoGzipDecompression = false;
61+
}
62+
63+
public ReadableByteChannelSessionBuilder setAutoGzipDecompression(
64+
boolean autoGzipDecompression) {
65+
this.autoGzipDecompression = autoGzipDecompression;
66+
return this;
5867
}
5968

6069
public BufferedReadableByteChannelSessionBuilder buffered() {
@@ -77,13 +86,27 @@ public UnbufferedReadableByteChannelSessionBuilder unbuffered() {
7786
ApiaryReadRequest, SettableApiFuture<StorageObject>, UnbufferedReadableByteChannel>
7887
bindFunction() {
7988
// for any non-final value, create a reference to the value at this point in time
80-
return (request, resultFuture) ->
81-
new ApiaryUnbufferedReadableByteChannel(
89+
boolean autoGzipDecompression = this.autoGzipDecompression;
90+
return (request, resultFuture) -> {
91+
if (autoGzipDecompression) {
92+
return new GzipReadableByteChannel(
93+
new ApiaryUnbufferedReadableByteChannel(
94+
request,
95+
blobReadChannelContext.getApiaryClient(),
96+
resultFuture,
97+
blobReadChannelContext.getStorageOptions(),
98+
blobReadChannelContext.getRetryAlgorithmManager().idempotent()),
99+
ApiFutures.transform(
100+
resultFuture, StorageObject::getContentEncoding, MoreExecutors.directExecutor()));
101+
} else {
102+
return new ApiaryUnbufferedReadableByteChannel(
82103
request,
83104
blobReadChannelContext.getApiaryClient(),
84105
resultFuture,
85106
blobReadChannelContext.getStorageOptions(),
86107
blobReadChannelContext.getRetryAlgorithmManager().idempotent());
108+
}
109+
};
87110
}
88111

89112
public static final class BufferedReadableByteChannelSessionBuilder {

google-cloud-storage/src/test/java/com/google/cloud/storage/it/ITBlobReadChannelTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ public void limitAfterReadWorks() throws IOException {
427427
String xxd1 = xxd(bytes1);
428428
assertThat(xxd1).isEqualTo(xxdExpected1);
429429

430-
// seek forward to a new offset
430+
// change the limit
431431
reader.limit(10);
432432

433433
// read again

0 commit comments

Comments
 (0)