diff --git a/warehouse/core/src/main/java/datawave/mr/bulk/BulkInputFormat.java b/warehouse/core/src/main/java/datawave/mr/bulk/BulkInputFormat.java index 5dac002ddb..67fef0102d 100644 --- a/warehouse/core/src/main/java/datawave/mr/bulk/BulkInputFormat.java +++ b/warehouse/core/src/main/java/datawave/mr/bulk/BulkInputFormat.java @@ -6,8 +6,10 @@ import java.net.InetAddress; import java.net.URLDecoder; import java.net.URLEncoder; +import java.text.DateFormat; import java.util.ArrayList; import java.util.Collection; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -52,9 +54,11 @@ import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.schema.MetadataSchema; import org.apache.accumulo.core.security.Authorizations; +import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.accumulo.core.security.TablePermission; import org.apache.accumulo.core.singletons.SingletonReservation; import org.apache.accumulo.core.util.Pair; +import org.apache.accumulo.core.util.format.DateFormatSupplier; import org.apache.accumulo.core.util.format.DefaultFormatter; import org.apache.accumulo.core.util.threads.Threads; import org.apache.commons.codec.binary.Base64; @@ -94,6 +98,9 @@ public class BulkInputFormat extends InputFormat { protected static final Logger log = Logger.getLogger(BulkInputFormat.class); + private static final ThreadLocal tmpDate = ThreadLocal.withInitial(Date::new); + private static final ThreadLocal formatter = DateFormatSupplier.createDefaultFormatSupplier(); + protected static final String PREFIX = BulkInputFormat.class.getSimpleName(); protected static final String INPUT_INFO_HAS_BEEN_SET = PREFIX + ".configured"; protected static final String INSTANCE_HAS_BEEN_SET = PREFIX + ".instanceConfigured"; @@ -1322,6 +1329,22 @@ public String toString() { public RecordReader createRecordReader(InputSplit split, TaskAttemptContext context) { return new RecordReaderBase() { + + // helper function for formatting. Rewritten from DefaultFormatter.appendBytes() + private StringBuilder appendBytes(StringBuilder sb, byte[] ba, int offset, int len) { + for (int i = 0; i < len; i++) { + int c = 0xff & ba[offset + i]; + if (c == '\\') { + sb.append("\\\\"); + } else if (c >= 32 && c <= 126) { + sb.append((char) c); + } else { + sb.append("\\x").append(String.format("%02X", c)); + } + } + return sb; + } + @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (scannerIterator.hasNext()) { @@ -1329,8 +1352,37 @@ public boolean nextKeyValue() throws IOException, InterruptedException { Entry entry = scannerIterator.next(); currentK = currentKey = entry.getKey(); currentV = currentValue = entry.getValue(); - if (log.isTraceEnabled()) - log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true)); + if (log.isTraceEnabled()) { + + // rewritten from DefaultFormatter.formatEntry() + StringBuilder sb = new StringBuilder(); + Text buffer = new Text(); + + // append row0 + appendBytes(sb, currentK.getRow(buffer).getBytes(), 0, currentK.getRow(buffer).getLength()).append(" "); + + // append column family + appendBytes(sb, currentK.getColumnFamily(buffer).getBytes(), 0, currentK.getColumnFamily(buffer).getLength()).append(":"); + + // append column qualifier + appendBytes(sb, currentK.getColumnQualifier(buffer).getBytes(), 0, currentK.getColumnQualifier(buffer).getLength()).append(" "); + + // append visibility expression + sb.append(new ColumnVisibility(currentK.getColumnVisibility(buffer))); + + // append timestamp + tmpDate.get().setTime(entry.getKey().getTimestamp()); + sb.append(" ").append(formatter.get().format(tmpDate.get())); + + // append value + if (currentV != null && currentV.getSize() > 0) { + sb.append("\t"); + appendBytes(sb, currentV.get(), 0, currentV.getSize()); + } + + log.trace("Processing key/value pair: " + sb); + } + return true; } else if (numKeysRead < 0) { numKeysRead = 0; @@ -1339,5 +1391,4 @@ public boolean nextKeyValue() throws IOException, InterruptedException { } }; } - }