001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import java.io.IOException; 021import java.text.ParseException; 022import java.text.SimpleDateFormat; 023import java.util.ArrayList; 024import java.util.Collections; 025import java.util.HashSet; 026import java.util.List; 027import java.util.Map; 028import java.util.Set; 029import java.util.TreeMap; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.conf.Configured; 032import org.apache.hadoop.fs.Path; 033import org.apache.hadoop.hbase.Cell; 034import org.apache.hadoop.hbase.CellUtil; 035import org.apache.hadoop.hbase.ExtendedCell; 036import org.apache.hadoop.hbase.HBaseConfiguration; 037import org.apache.hadoop.hbase.KeyValue; 038import org.apache.hadoop.hbase.KeyValueUtil; 039import org.apache.hadoop.hbase.PrivateCellUtil; 040import org.apache.hadoop.hbase.TableName; 041import org.apache.hadoop.hbase.client.Connection; 042import org.apache.hadoop.hbase.client.ConnectionFactory; 043import org.apache.hadoop.hbase.client.Delete; 044import org.apache.hadoop.hbase.client.Mutation; 045import org.apache.hadoop.hbase.client.Put; 046import org.apache.hadoop.hbase.client.RegionLocator; 047import org.apache.hadoop.hbase.client.Table; 048import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 049import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.TableInfo; 050import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec; 051import org.apache.hadoop.hbase.snapshot.SnapshotRegionLocator; 052import org.apache.hadoop.hbase.util.Bytes; 053import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 054import org.apache.hadoop.hbase.util.MapReduceExtendedCell; 055import org.apache.hadoop.hbase.wal.WALEdit; 056import org.apache.hadoop.hbase.wal.WALKey; 057import org.apache.hadoop.io.WritableComparable; 058import org.apache.hadoop.mapreduce.Job; 059import org.apache.hadoop.mapreduce.Mapper; 060import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 061import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 062import org.apache.hadoop.util.Tool; 063import org.apache.hadoop.util.ToolRunner; 064import org.apache.yetus.audience.InterfaceAudience; 065import org.slf4j.Logger; 066import org.slf4j.LoggerFactory; 067 068/** 069 * A tool to replay WAL files as a M/R job. The WAL can be replayed for a set of tables or all 070 * tables, and a time range can be provided (in milliseconds). The WAL is filtered to the passed set 071 * of tables and the output can optionally be mapped to another set of tables. WAL replay can also 072 * generate HFiles for later bulk importing, in that case the WAL is replayed for a single table 073 * only. 074 */ 075@InterfaceAudience.Public 076public class WALPlayer extends Configured implements Tool { 077 private static final Logger LOG = LoggerFactory.getLogger(WALPlayer.class); 078 final static String NAME = "WALPlayer"; 079 public final static String BULK_OUTPUT_CONF_KEY = "wal.bulk.output"; 080 public final static String TABLES_KEY = "wal.input.tables"; 081 public final static String TABLE_MAP_KEY = "wal.input.tablesmap"; 082 public final static String INPUT_FILES_SEPARATOR_KEY = "wal.input.separator"; 083 public final static String IGNORE_MISSING_FILES = "wal.input.ignore.missing.files"; 084 public final static String MULTI_TABLES_SUPPORT = "wal.multi.tables.support"; 085 086 protected static final String tableSeparator = ";"; 087 088 private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name"; 089 090 public WALPlayer() { 091 } 092 093 protected WALPlayer(final Configuration c) { 094 super(c); 095 } 096 097 /** 098 * A mapper that just writes out KeyValues. This one can be used together with 099 * {@link KeyValueSortReducer} 100 * @deprecated Use {@link WALCellMapper}. Will be removed from 3.0 onwards 101 */ 102 @Deprecated 103 static class WALKeyValueMapper extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue> { 104 private Set<String> tableSet = new HashSet<String>(); 105 private boolean multiTableSupport = false; 106 107 @Override 108 public void map(WALKey key, WALEdit value, Context context) throws IOException { 109 try { 110 TableName table = key.getTableName(); 111 if (tableSet.contains(table.getNameAsString())) { 112 for (Cell cell : value.getCells()) { 113 if (WALEdit.isMetaEditFamily(cell)) { 114 continue; 115 } 116 KeyValue keyValue = KeyValueUtil.ensureKeyValue(cell); 117 byte[] outKey = multiTableSupport 118 ? Bytes.add(table.getName(), Bytes.toBytes(tableSeparator), 119 CellUtil.cloneRow(keyValue)) 120 : CellUtil.cloneRow(keyValue); 121 context.write(new ImmutableBytesWritable(outKey), keyValue); 122 } 123 } 124 } catch (InterruptedException e) { 125 e.printStackTrace(); 126 } 127 } 128 129 @Override 130 public void setup(Context context) throws IOException { 131 Configuration conf = context.getConfiguration(); 132 String[] tables = conf.getStrings(TABLES_KEY); 133 this.multiTableSupport = conf.getBoolean(MULTI_TABLES_SUPPORT, false); 134 for (String table : tables) { 135 tableSet.add(table); 136 } 137 } 138 } 139 140 /** 141 * A mapper that just writes out Cells. This one can be used together with {@link CellSortReducer} 142 */ 143 static class WALCellMapper extends Mapper<WALKey, WALEdit, WritableComparable<?>, Cell> { 144 private Set<String> tableSet = new HashSet<>(); 145 private boolean multiTableSupport = false; 146 private boolean diskBasedSortingEnabled = false; 147 148 @Override 149 public void map(WALKey key, WALEdit value, Context context) throws IOException { 150 try { 151 TableName table = key.getTableName(); 152 if (tableSet.contains(table.getNameAsString())) { 153 for (Cell cell : value.getCells()) { 154 if (WALEdit.isMetaEditFamily(cell)) { 155 continue; 156 } 157 158 // Set sequenceId from WALKey, since it is not included by WALCellCodec. The sequenceId 159 // on WALKey is the same value that was on the cells in the WALEdit. This enables 160 // CellSortReducer to use sequenceId to disambiguate duplicate cell timestamps. 161 // See HBASE-27649 162 PrivateCellUtil.setSequenceId(cell, key.getSequenceId()); 163 164 byte[] outKey = multiTableSupport 165 ? Bytes.add(table.getName(), Bytes.toBytes(tableSeparator), CellUtil.cloneRow(cell)) 166 : CellUtil.cloneRow(cell); 167 ExtendedCell extendedCell = (ExtendedCell) cell; 168 context.write(wrapKey(outKey, extendedCell), new MapReduceExtendedCell(extendedCell)); 169 } 170 } 171 } catch (InterruptedException e) { 172 e.printStackTrace(); 173 } 174 } 175 176 @Override 177 public void setup(Context context) throws IOException { 178 Configuration conf = context.getConfiguration(); 179 String[] tables = conf.getStrings(TABLES_KEY); 180 this.multiTableSupport = conf.getBoolean(MULTI_TABLES_SUPPORT, false); 181 this.diskBasedSortingEnabled = HFileOutputFormat2.diskBasedSortingEnabled(conf); 182 Collections.addAll(tableSet, tables); 183 } 184 185 private WritableComparable<?> wrapKey(byte[] key, ExtendedCell cell) { 186 if (this.diskBasedSortingEnabled) { 187 // Important to build a new cell with the updated key to maintain multi-table support 188 KeyValue kv = new KeyValue(key, 0, key.length, cell.getFamilyArray(), 189 cell.getFamilyOffset(), cell.getFamilyLength(), cell.getQualifierArray(), 190 cell.getQualifierOffset(), cell.getQualifierLength(), cell.getTimestamp(), 191 KeyValue.Type.codeToType(cell.getTypeByte()), null, 0, 0); 192 kv.setSequenceId(cell.getSequenceId()); 193 return new KeyOnlyCellComparable(kv); 194 } else { 195 return new ImmutableBytesWritable(key); 196 } 197 } 198 } 199 200 /** 201 * Enum for map metrics. Keep it out here rather than inside in the Map inner-class so we can find 202 * associated properties. 203 */ 204 protected static enum Counter { 205 /** Number of aggregated writes */ 206 PUTS, 207 /** Number of aggregated deletes */ 208 DELETES, 209 CELLS_READ, 210 CELLS_WRITTEN, 211 WALEDITS 212 } 213 214 /** 215 * A mapper that writes out {@link Mutation} to be directly applied to a running HBase instance. 216 */ 217 protected static class WALMapper 218 extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation> { 219 private Map<TableName, TableName> tables = new TreeMap<>(); 220 221 @Override 222 public void map(WALKey key, WALEdit value, Context context) throws IOException { 223 context.getCounter(Counter.WALEDITS).increment(1); 224 try { 225 if (tables.isEmpty() || tables.containsKey(key.getTableName())) { 226 TableName targetTable = 227 tables.isEmpty() ? key.getTableName() : tables.get(key.getTableName()); 228 ImmutableBytesWritable tableOut = new ImmutableBytesWritable(targetTable.getName()); 229 Put put = null; 230 Delete del = null; 231 Cell lastCell = null; 232 for (Cell cell : value.getCells()) { 233 context.getCounter(Counter.CELLS_READ).increment(1); 234 // Filtering WAL meta marker entries. 235 if (WALEdit.isMetaEditFamily(cell)) { 236 continue; 237 } 238 // Allow a subclass filter out this cell. 239 if (filter(context, cell)) { 240 // A WALEdit may contain multiple operations (HBASE-3584) and/or 241 // multiple rows (HBASE-5229). 242 // Aggregate as much as possible into a single Put/Delete 243 // operation before writing to the context. 244 if ( 245 lastCell == null || lastCell.getTypeByte() != cell.getTypeByte() 246 || !CellUtil.matchingRows(lastCell, cell) 247 ) { 248 // row or type changed, write out aggregate KVs. 249 if (put != null) { 250 context.write(tableOut, put); 251 context.getCounter(Counter.PUTS).increment(1); 252 } 253 if (del != null) { 254 context.write(tableOut, del); 255 context.getCounter(Counter.DELETES).increment(1); 256 } 257 if (CellUtil.isDelete(cell)) { 258 del = new Delete(CellUtil.cloneRow(cell)); 259 } else { 260 put = new Put(CellUtil.cloneRow(cell)); 261 } 262 } 263 if (CellUtil.isDelete(cell)) { 264 del.add(cell); 265 } else { 266 put.add(cell); 267 } 268 context.getCounter(Counter.CELLS_WRITTEN).increment(1); 269 } 270 lastCell = cell; 271 } 272 // write residual KVs 273 if (put != null) { 274 context.write(tableOut, put); 275 context.getCounter(Counter.PUTS).increment(1); 276 } 277 if (del != null) { 278 context.getCounter(Counter.DELETES).increment(1); 279 context.write(tableOut, del); 280 } 281 } 282 } catch (InterruptedException e) { 283 e.printStackTrace(); 284 } 285 } 286 287 protected boolean filter(Context context, final Cell cell) { 288 return true; 289 } 290 291 @Override 292 protected void 293 cleanup(Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation>.Context context) 294 throws IOException, InterruptedException { 295 super.cleanup(context); 296 } 297 298 @SuppressWarnings("checkstyle:EmptyBlock") 299 @Override 300 public void setup(Context context) throws IOException { 301 String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY); 302 String[] tablesToUse = context.getConfiguration().getStrings(TABLES_KEY); 303 if (tableMap == null) { 304 tableMap = tablesToUse; 305 } 306 if (tablesToUse == null) { 307 // Then user wants all tables. 308 } else if (tablesToUse.length != tableMap.length) { 309 // this can only happen when WALMapper is used directly by a class other than WALPlayer 310 throw new IOException("Incorrect table mapping specified ."); 311 } 312 int i = 0; 313 if (tablesToUse != null) { 314 for (String table : tablesToUse) { 315 tables.put(TableName.valueOf(table), TableName.valueOf(tableMap[i++])); 316 } 317 } 318 } 319 } 320 321 void setupTime(Configuration conf, String option) throws IOException { 322 String val = conf.get(option); 323 if (null == val) { 324 return; 325 } 326 long ms; 327 try { 328 // first try to parse in user friendly form 329 ms = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SS").parse(val).getTime(); 330 } catch (ParseException pe) { 331 try { 332 // then see if just a number of ms's was specified 333 ms = Long.parseLong(val); 334 } catch (NumberFormatException nfe) { 335 throw new IOException( 336 option + " must be specified either in the form 2001-02-20T16:35:06.99 " 337 + "or as number of milliseconds"); 338 } 339 } 340 conf.setLong(option, ms); 341 } 342 343 /** 344 * Sets up the actual job. 345 * @param args The command line parameters. 346 * @return The newly created job. 347 * @throws IOException When setting up the job fails. 348 */ 349 public Job createSubmittableJob(String[] args) throws IOException { 350 Configuration conf = getConf(); 351 setupTime(conf, WALInputFormat.START_TIME_KEY); 352 setupTime(conf, WALInputFormat.END_TIME_KEY); 353 String inputDirs = args[0]; 354 String[] tables = args.length == 1 ? new String[] {} : args[1].split(","); 355 String[] tableMap; 356 if (args.length > 2) { 357 tableMap = args[2].split(","); 358 if (tableMap.length != tables.length) { 359 throw new IOException("The same number of tables and mapping must be provided."); 360 } 361 } else { 362 // if no mapping is specified, map each table to itself 363 tableMap = tables; 364 } 365 366 boolean multiTableSupport = conf.getBoolean(MULTI_TABLES_SUPPORT, false); 367 conf.setStrings(TABLES_KEY, tables); 368 conf.setStrings(TABLE_MAP_KEY, tableMap); 369 conf.set(FileInputFormat.INPUT_DIR, inputDirs); 370 Job job = Job.getInstance(conf, 371 conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime())); 372 job.setJarByClass(WALPlayer.class); 373 374 job.setInputFormatClass(WALInputFormat.class); 375 boolean diskBasedSortingEnabled = HFileOutputFormat2.diskBasedSortingEnabled(conf); 376 if (diskBasedSortingEnabled) { 377 job.setMapOutputKeyClass(KeyOnlyCellComparable.class); 378 job.setSortComparatorClass(KeyOnlyCellComparable.KeyOnlyCellComparator.class); 379 } else { 380 job.setMapOutputKeyClass(ImmutableBytesWritable.class); 381 } 382 383 String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); 384 if (hfileOutPath != null) { 385 LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs); 386 387 if (!multiTableSupport && tables.length != 1) { 388 throw new IOException("Exactly one table must be specified for the bulk export option"); 389 } 390 391 // WALPlayer needs ExtendedCellSerialization so that sequenceId can be propagated when 392 // sorting cells in CellSortReducer 393 job.getConfiguration().setBoolean(HFileOutputFormat2.EXTENDED_CELL_SERIALIZATION_ENABLED_KEY, 394 true); 395 396 // the bulk HFile case 397 List<TableName> tableNames = getTableNameList(tables); 398 399 job.setMapperClass(WALCellMapper.class); 400 if (diskBasedSortingEnabled) { 401 job.setReducerClass(PreSortedCellsReducer.class); 402 } else { 403 job.setReducerClass(CellSortReducer.class); 404 } 405 Path outputDir = new Path(hfileOutPath); 406 FileOutputFormat.setOutputPath(job, outputDir); 407 job.setMapOutputValueClass(MapReduceExtendedCell.class); 408 try (Connection conn = ConnectionFactory.createConnection(conf);) { 409 List<TableInfo> tableInfoList = new ArrayList<>(); 410 for (TableName tableName : tableNames) { 411 Table table = conn.getTable(tableName); 412 RegionLocator regionLocator = getRegionLocator(tableName, conf, conn); 413 tableInfoList.add(new TableInfo(table.getDescriptor(), regionLocator)); 414 } 415 if (multiTableSupport) { 416 MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfoList); 417 } else { 418 TableInfo tableInfo = tableInfoList.get(0); 419 HFileOutputFormat2.configureIncrementalLoad(job, tableInfo.getTableDescriptor(), 420 tableInfo.getRegionLocator()); 421 } 422 } 423 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), 424 org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class); 425 } else { 426 // output to live cluster 427 job.setMapperClass(WALMapper.class); 428 job.setOutputFormatClass(MultiTableOutputFormat.class); 429 TableMapReduceUtil.addDependencyJars(job); 430 TableMapReduceUtil.initCredentials(job); 431 // No reducers. 432 job.setNumReduceTasks(0); 433 } 434 String codecCls = WALCellCodec.getWALCellCodecClass(conf).getName(); 435 try { 436 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), 437 Class.forName(codecCls)); 438 } catch (Exception e) { 439 throw new IOException("Cannot determine wal codec class " + codecCls, e); 440 } 441 return job; 442 } 443 444 private List<TableName> getTableNameList(String[] tables) { 445 List<TableName> list = new ArrayList<TableName>(); 446 for (String name : tables) { 447 list.add(TableName.valueOf(name)); 448 } 449 return list; 450 } 451 452 /** 453 * Print usage 454 * @param errorMsg Error message. Can be null. 455 */ 456 private void usage(final String errorMsg) { 457 if (errorMsg != null && errorMsg.length() > 0) { 458 System.err.println("ERROR: " + errorMsg); 459 } 460 System.err.println("Usage: " + NAME + " [options] <WAL inputdir> [<tables> <tableMappings>]"); 461 System.err.println(" <WAL inputdir> directory of WALs to replay."); 462 System.err.println(" <tables> comma separated list of tables. If no tables specified,"); 463 System.err.println(" all are imported (even hbase:meta if present)."); 464 System.err.println( 465 " <tableMappings> WAL entries can be mapped to a new set of tables by " + "passing"); 466 System.err 467 .println(" <tableMappings>, a comma separated list of target " + "tables."); 468 System.err 469 .println(" If specified, each table in <tables> must have a " + "mapping."); 470 System.err.println("To generate HFiles to bulk load instead of loading HBase directly, pass:"); 471 System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output"); 472 System.err.println(" Only one table can be specified, and no mapping allowed!"); 473 System.err.println("To specify a time range, pass:"); 474 System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]"); 475 System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]"); 476 System.err.println(" The start and the end date of timerange (inclusive). The dates can be"); 477 System.err 478 .println(" expressed in milliseconds-since-epoch or yyyy-MM-dd'T'HH:mm:ss.SS " + "format."); 479 System.err.println(" E.g. 1234567890120 or 2009-02-13T23:32:30.12"); 480 System.err.println("Other options:"); 481 System.err.println(" -D" + JOB_NAME_CONF_KEY + "=jobName"); 482 System.err.println(" Use the specified mapreduce job name for the wal player"); 483 System.err.println(" -Dwal.input.separator=' '"); 484 System.err.println(" Change WAL filename separator (WAL dir names use default ','.)"); 485 System.err.println("For performance also consider the following options:\n" 486 + " -Dmapreduce.map.speculative=false\n" + " -Dmapreduce.reduce.speculative=false"); 487 } 488 489 /** 490 * Main entry point. 491 * @param args The command line parameters. 492 * @throws Exception When running the job fails. 493 */ 494 public static void main(String[] args) throws Exception { 495 int ret = ToolRunner.run(new WALPlayer(HBaseConfiguration.create()), args); 496 System.exit(ret); 497 } 498 499 @Override 500 public int run(String[] args) throws Exception { 501 if (args.length < 1) { 502 usage("Wrong number of arguments: " + args.length); 503 System.exit(-1); 504 } 505 Job job = createSubmittableJob(args); 506 return job.waitForCompletion(true) ? 0 : 1; 507 } 508 509 private static RegionLocator getRegionLocator(TableName tableName, Configuration conf, 510 Connection conn) throws IOException { 511 if (SnapshotRegionLocator.shouldUseSnapshotRegionLocator(conf, tableName)) { 512 return SnapshotRegionLocator.create(conf, tableName); 513 } 514 515 return conn.getRegionLocator(tableName); 516 } 517}