Using apache avro reflect

fodon picture fodon · Aug 8, 2012 · Viewed 21.1k times · Source

Avro serialization is popular with Hadoop users but examples are so hard to find.

Can anyone help me with this sample code? I'm mostly interested in using the Reflect API to read/write into files and to use the Union and Null annotations.

public class Reflect {

    public class Packet {
        int cost;
        @Nullable TimeStamp stamp;
        public Packet(int cost, TimeStamp stamp){
            this.cost = cost;
            this.stamp = stamp;
        }
    }

    public class TimeStamp {
        int hour = 0;
        int second = 0;
        public TimeStamp(int hour, int second){
            this.hour = hour;
            this.second = second;
        }
    }

    public static void main(String[] args) throws IOException {
        TimeStamp stamp;
        Packet packet;

        stamp = new TimeStamp(12, 34);
        packet = new Packet(9, stamp);
        write(file, packet);

        packet = new Packet(8, null);
        write(file, packet);
        file.close();

        // open file to read.
        packet = read(file);
        packet = read(file);
    }
}

Answer

Doug Cutting picture Doug Cutting · Aug 8, 2012

Here's a version of the above program that works.

This also uses compression on the file.

import java.io.File;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.DatumReader;
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.reflect.ReflectDatumWriter;
import org.apache.avro.reflect.ReflectDatumReader;
import org.apache.avro.reflect.Nullable;

public class Reflect {

  public static class Packet {
    int cost;
    @Nullable TimeStamp stamp;
    public Packet() {}                        // required to read
    public Packet(int cost, TimeStamp stamp){
      this.cost = cost;
      this.stamp = stamp;
    }
  }

  public static class TimeStamp {
    int hour = 0;
    int second = 0;
    public TimeStamp() {}                     // required to read
    public TimeStamp(int hour, int second){
      this.hour = hour;
      this.second = second;
    }
  }

  public static void main(String[] args) throws Exception {
    // one argument: a file name
    File file = new File(args[0]);

    // get the reflected schema for packets
    Schema schema = ReflectData.get().getSchema(Packet.class);

    // create a file of packets
    DatumWriter<Packet> writer = new ReflectDatumWriter<Packet>(Packet.class);
    DataFileWriter<Packet> out = new DataFileWriter<Packet>(writer)
      .setCodec(CodecFactory.deflateCodec(9))
      .create(schema, file);

    // write 100 packets to the file, odds with null timestamp
    for (int i = 0; i < 100; i++) {
      out.append(new Packet(i, (i%2==0) ? new TimeStamp(12, i) : null));
    }

    // close the output file
    out.close();

    // open a file of packets
    DatumReader<Packet> reader = new ReflectDatumReader<Packet>(Packet.class);
    DataFileReader<Packet> in = new DataFileReader<Packet>(file, reader);

    // read 100 packets from the file & print them as JSON
    for (Packet packet : in) {
      System.out.println(ReflectData.get().toString(packet));
    }

    // close the input file
    in.close();
  }

}