From 1d2f31e2e147d945f6be72fd1b9215706ded9f67 Mon Sep 17 00:00:00 2001 From: Mark Wagner Date: Thu, 30 Mar 2017 14:30:23 -0700 Subject: [PATCH] Expire schemas from Avro cache based on age instead of recency Guava's cache performs poorly under high loaded when tracking recency. The details are in google/guava#2408. The short version is that each access is tracked and aggregated. If there's constant read access by more threads than there are cores (as happens for the AvroSerde in a Presto worker), the whole thing gets backed up and ultimately leads to a lengthy GC pause and query timeouts. Expiring based on age instead of recency avoids the issue. --- .../org/apache/hadoop/hive/serde2/avro/InstanceCache.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java b/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java index e096e23..c3b2f60 100644 --- a/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java +++ b/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java @@ -17,15 +17,16 @@ import com.google.common.cache.CacheBuilder; import java.util.Set; +import java.util.concurrent.TimeUnit; /** - * This is a thread-safe, size-bounded fork of the Hive version. + * This is a thread-safe, time-bounded fork of the Hive version. * It also includes the correctness fix from HIVE-11288. */ public abstract class InstanceCache { private final Cache cache = CacheBuilder.newBuilder() - .maximumSize(100_000) + .expireAfterWrite(1, TimeUnit.MINUTES) .build(); protected InstanceCache() {}