nl.basjes.parse.core.Parser Java Examples

The following examples show how to use nl.basjes.parse.core.Parser. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MultiLineHttpdLogParserTest.java    From logparser with Apache License 2.0 6 votes vote down vote up
/**
 * Test of initialize method, of class ApacheHttpdLogParser.
 */
@Test
public void fullTest1() throws Exception {

    String logFormat = LOG_FORMAT_1 + '\n'
                     + '\n'
                     + LOG_FORMAT_2 + '\n'
                     + '\n';

    Parser<TestRecord> parser = new HttpdLoglineParser<>(TestRecord.class, logFormat);

    validateLine1(parser);
    validateLine1(parser);
    validateLine2(parser);
    validateLine2(parser);
    validateLine1(parser);
    validateLine1(parser);
    validateLine2(parser);
    validateLine2(parser);
    validateLine1(parser);
    validateLine1(parser);
    validateLine2(parser);
    validateLine2(parser);
}
 
Example #2
Source File: Main.java    From logparser with Apache License 2.0 6 votes vote down vote up
private void printAllPossibles(String logformat) throws NoSuchMethodException, MissingDissectorsException, InvalidDissectorException {
    // To figure out what values we CAN get from this line we instantiate the parser with a dummy class
    // that does not have ANY @Field annotations.
    Parser<Object> dummyParser= new HttpdLoglineParser<>(Object.class, logformat);

    List<String> possiblePaths;
    possiblePaths = dummyParser.getPossiblePaths();

    // If you want to call 'getCasts' then the actual parser needs to be constructed.
    // Simply calling getPossiblePaths does not build the actual parser.
    // Because we want this for all possibilities yet we are never actually going to use this instance of the parser
    // We simply give it a random method with the right signature and tell it we want all possible paths
    dummyParser.addParseTarget(String.class.getMethod("indexOf", String.class), possiblePaths);

    LOG.info("==================================");
    LOG.info("Possible output:");
    for (String path : possiblePaths) {
        LOG.info("{}     {}", path, dummyParser.getCasts(path));
    }
    LOG.info("==================================");
}
 
Example #3
Source File: TestCase.java    From logparser with Apache License 2.0 6 votes vote down vote up
public static Parser<TestRecord> createTestParser() throws NoSuchMethodException {
    Parser<TestRecord> parser = new HttpdLoglineParser<>(TestRecord.class, getLogFormat());

    parser.addDissector(new ScreenResolutionDissector());

    parser.addTypeRemapping("request.firstline.uri.query.g", "HTTP.URI");
    parser.addTypeRemapping("request.firstline.uri.query.r", "HTTP.URI");
    parser.addTypeRemapping("request.firstline.uri.query.s", "SCREENRESOLUTION");

    parser.addParseTarget("setConnectionClientHost", "IP:connection.client.host");
    parser.addParseTarget("setRequestReceiveTime",   "TIME.STAMP:request.receive.time");
    parser.addParseTarget("setReferrer",             "STRING:request.firstline.uri.query.g.query.promo");
    parser.addParseTarget("setScreenResolution",     "STRING:request.firstline.uri.query.s");
    parser.addParseTarget("setScreenWidth",          "SCREENWIDTH:request.firstline.uri.query.s.width");
    parser.addParseTarget("setScreenHeight",         "SCREENHEIGHT:request.firstline.uri.query.s.height");
    parser.addParseTarget("setGoogleQuery",          "STRING:request.firstline.uri.query.r.query.blabla");
    parser.addParseTarget("setBui",                  "HTTP.COOKIE:request.cookies.bui");
    parser.addParseTarget("setUseragent",            "HTTP.USERAGENT:request.user-agent");
    return parser;
}
 
Example #4
Source File: ApacheHttpdLogfileRecordReader.java    From logparser with Apache License 2.0 6 votes vote down vote up
private void setupFields() throws MissingDissectorsException, InvalidDissectorException, NoSuchMethodException, IOException {
    if (fieldList == null || fieldList.isEmpty()) {
        return; // Nothing to do here
    }
    String firstField = fieldList.get(0);
    if (fieldList.size() == 1 &&
        firstField.toLowerCase().trim().equals(FIELDS)) {
        outputAllPossibleFields = true;
        allPossiblePaths = getParser().getPossiblePaths();
        allPossiblePathsFieldName = firstField;
        Parser<ParsedRecord> newParser = instantiateParser(logformat)
            .addParseTarget(ParsedRecord.class.getMethod("set", String.class, String.class), allPossiblePaths)
            .addTypeRemappings(typeRemappings);
        allCasts = newParser.getAllCasts();
    }
}
 
Example #5
Source File: ApacheHttpdLogParserTest.java    From logparser with Apache License 2.0 5 votes vote down vote up
/**
 * Test of mod_reqtimeout 408 status code
 * Assume  mod_reqtimeout is enabled and absolutely no data is entered by a client
 * after making the connection. The result is a http 408 status code and a logline that has proven to
 * result in several fields failing to be parsed because they are different than the specifications.
 */
@Test
public void test408ModReqTimeout() throws Exception {

    final String logformat =
        "\"%%\" \"%a\" \"%{c}a\" \"%A\" \"%B\" \"%b\" \"%D\" \"%f\" \"%h\" \"%H\" \"%k\" " +
        "\"%l\" \"%L\" \"%m\" \"%p\" \"%{canonical}p\" \"%{local}p\" \"%{remote}p\" \"%P\" \"%{pid}P\" \"%{tid}P\"" +
        " \"%{hextid}P\" \"%q\" \"%r\" \"%R\" \"%s\" \"%>s\" \"%t\" \"%{msec}t\" \"%{begin:msec}t\" \"%{end:msec}t" +
        "\" \"%{usec}t\" \"%{begin:usec}t\" \"%{end:usec}t\" \"%{msec_frac}t\" \"%{begin:msec_frac}t\" \"%{end:mse" +
        "c_frac}t\" \"%{usec_frac}t\" \"%{begin:usec_frac}t\" \"%{end:usec_frac}t\" \"%T\" \"%u\" \"%U\" \"%v\" \"" +
        "%V\" \"%X\" \"%I\" \"%O\" \"%{cookie}i\" \"%{set-cookie}o\" \"%{user-agent}i\" \"%{referer}i\"";

    String line200 = "\"%\" \"127.0.0.1\" \"127.0.0.1\" \"127.0.0.1\" \"3186\" \"3186\" \"1302\" \"/var/www/html/index.html\" " +
        "\"127.0.0.1\" \"HTTP/1.1\" \"0\" \"-\" \"-\" \"GET\" \"80\" \"80\" \"80\" \"50142\" \"10344\" \"10344\" " +
        "\"139854162249472\" \"139854162249472\" \"\" \"GET / HTTP/1.1\" \"-\" \"200\" \"200\" " +
        "\"[09/Aug/2016:22:57:59 +0200]\" \"1470776279833\" \"1470776279833\" \"1470776279835\" \"1470776279833934\" " +
        "\"1470776279833934\" \"1470776279835236\" \"833\" \"833\" \"835\" \"833934\" \"833934\" \"835236\" \"0\" " +
        "\"-\" \"/index.html\" \"committer.lan.basjes.nl\" \"localhost\" \"+\" \"490\" \"3525\" \"-\" \"-\" " +
        "\"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36\" \"-\"";

    String line408 = "\"%\" \"127.0.0.1\" \"127.0.0.1\" \"127.0.0.1\" \"0\" \"-\" \"34\" \"-\" " +
        "\"127.0.0.1\" \"HTTP/1.0\" \"0\" \"-\" \"-\" \"-\" \"80\" \"80\" \"80\" \"50150\" \"10344\" \"10344\" " +
        "\"139854067267328\" \"139854067267328\" \"\" \"-\" \"-\" \"408\" \"408\" " +
        "\"[09/Aug/2016:22:59:14 +0200]\" \"1470776354625\" \"1470776354625\" \"1470776354625\" \"1470776354625377\" " +
        "\"1470776354625377\" \"1470776354625411\" \"625\" \"625\" \"625\" \"625377\" \"625377\" \"625411\" \"0\" " +
        "\"-\" \"-\" \"committer.lan.basjes.nl\" \"committer.lan.basjes.nl\" \"-\" \"0\" \"0\" \"-\" \"-\" \"-\" \"-\"";

    Parser<EmptyTestRecord> parser =
        new HttpdLoglineParser<>(EmptyTestRecord.class, logformat)
        .addParseTarget(EmptyTestRecord.class.getMethod("put", String.class, String.class),
                        "STRING:request.firstline.uri.query.foo");
    parser.parse(new EmptyTestRecord(), line200);
    parser.parse(new EmptyTestRecord(), line408);
}
 
Example #6
Source File: ApacheHttpdLogParserTest.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetPossiblePathsWithUnusableLogFormat() {
    Parser<TestRecord> parser = new HttpdLoglineParser<>(TestRecord.class, "Empty");

    List<String> paths = parser.getPossiblePaths(5);
    assertTrue("The output should be empty!", paths == null || paths.isEmpty());
}
 
Example #7
Source File: CookiesTest.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyRecordPossibles() {
    Parser<EmptyTestRecord> parser = new HttpdLoglineParser<>(EmptyTestRecord.class, LOG_FORMAT);

    List<String> possibles = parser.getPossiblePaths();
    for (String possible : possibles) {
        System.out.println(possible);
    }
}
 
Example #8
Source File: CookiesTest.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testRecordPossibles() {
    Parser<TestRecord> parser = new HttpdLoglineParser<>(TestRecord.class, LOG_FORMAT);

    List<String> possibles = parser.getPossiblePaths();
    for (String possible : possibles) {
        System.out.println(possible);
    }
}
 
Example #9
Source File: JsonLogFormatTest.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testBasicParsing() {
    Parser<TestRecord> parser = new HttpdLoglineParser<>(TestRecord.class, LOGFORMAT);

    DissectorTester tester = DissectorTester.create()
        .withParser(parser)
        .verbose();

    for (String logline: LOGLINES){
        tester.withInput(logline);
    }

    tester.expectValuePresent("TIME.LOCALIZEDSTRING:request.receive.time")
          .expectValuePresent("STRING:connection.server.name")
          .expectValuePresent("NUMBER:connection.client.logname")
          .expectValuePresent("STRING:connection.client.user")
          .expectValuePresent("HTTP.HEADER:request.header.x-forwarded-for")
          .expectValuePresent("HTTP.URI:request.referer")
          .expectValuePresent("HTTP.USERAGENT:request.user-agent")
          .expectValuePresent("HTTP.HEADER:request.header.host")
          .expectValuePresent("HTTP.FIRSTLINE:request.firstline")
          .expectValuePresent("HTTP.METHOD:request.firstline.method")
          .expectValuePresent("HTTP.URI:request.firstline.uri")
          .expectValuePresent("HTTP.PROTOCOL:request.firstline.protocol")
          .expectValuePresent("HTTP.PROTOCOL.VERSION:request.firstline.protocol.version")
          .expectValuePresent("STRING:request.status.last")
          .expectValuePresent("BYTES:response.body.bytes")
          .expectValuePresent("MICROSECONDS:response.server.processing.time")
          .expectValuePresent("HTTP.QUERYSTRING:request.firstline.uri.query")
          .expectValuePresent("HTTP.PATH:request.firstline.uri.path")
          .expectValuePresent("HTTP.REF:request.firstline.uri.ref");

    for (String path: parser.getPossiblePaths()){
        tester.expectPossible(path);
    }

    tester.checkExpectations();
}
 
Example #10
Source File: MultiLineHttpdLogParserTest.java    From logparser with Apache License 2.0 5 votes vote down vote up
private void validateLine1(Parser<TestRecord> parser) throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
    TestRecord record = new TestRecord();
    parser.parse(record, LINE_1);
    Map<String, String> results = record.getResults();

    assertEquals("127.0.0.1", results.get("IP:connection.client.host"));
    assertEquals("31/Dec/2012:23:49:41 +0100", results.get("TIME.STAMP:request.receive.time"));
    assertEquals("/foo", results.get("HTTP.URI:request.firstline.uri"));
    assertEquals("200", results.get("STRING:request.status.last"));
    assertEquals("1213", results.get("BYTESCLF:response.body.bytes"));
    assertEquals("http://localhost/index.php?mies=wim", results.get("HTTP.URI:request.referer"));
    assertEquals(null, results.get("HTTP.USERAGENT:request.user-agent"));
}
 
Example #11
Source File: MultiLineHttpdLogParserTest.java    From logparser with Apache License 2.0 5 votes vote down vote up
private void validateLine2(Parser<TestRecord> parser) throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
    TestRecord record = new TestRecord();
    parser.parse(record, LINE_2);
    Map<String, String> results = record.getResults();

    assertEquals("127.0.0.2", results.get("IP:connection.client.host"));
    assertEquals("31/Dec/2012:23:49:42 +0100", results.get("TIME.STAMP:request.receive.time"));
    assertEquals("/foo", results.get("HTTP.URI:request.firstline.uri"));
    assertEquals("404", results.get("STRING:request.status.last"));
    assertEquals(null, results.get("BYTESCLF:response.body.bytes"));
    assertEquals(null, results.get("HTTP.URI:request.referer"));
    assertEquals("Mozilla/5.0 (X11; Linux i686 on x86_64; rv:11.0) Gecko/20100101 Firefox/11.0",
            results.get("HTTP.USERAGENT:request.user-agent"));
}
 
Example #12
Source File: BasicOverallTest.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testBasicParsing() throws Exception {
    Parser<MyRecord> parser = new HttpdLoglineParser<>(MyRecord.class, LOG_FORMAT);
    MyRecord         record = new MyRecord();

    List<String> paths = parser.getPossiblePaths();

    parser.addParseTarget(record.getClass().getMethod("setValue", String.class, String.class), paths);

    for (String logline : LOG_LINES) {
        record.clear();
        parser.parse(record, logline);
        System.out.println(record.toString());
    }
}
 
Example #13
Source File: TestFieldSettersNotNull.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testDouble() throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
    new Parser<>(TestRecordDouble.class)
        .setRootType("INPUT")
        .addDissector(new NullValuesDissector())
        .parse("Doesn't matter")

        .noDouble("ANY:any")
        .noDouble("FLOAT:float")
        .noDouble("DOUBLE:double");
}
 
Example #14
Source File: TestFieldSettersNotNull.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testLong() throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
    new Parser<>(TestRecordLong.class)
        .setRootType("INPUT")
        .addDissector(new NullValuesDissector())
        .parse("Doesn't matter")

        .noLong("ANY:any")
        .noLong("INT:int")
        .noLong("LONG:long");
}
 
Example #15
Source File: ApacheHttpdLogfileRecordReader.java    From logparser with Apache License 2.0 5 votes vote down vote up
private Parser<ParsedRecord> createParser() throws IOException {
    if (fieldList == null || logformat == null) {
        return null;
    }

    Parser<ParsedRecord> newParser;
    try {
        newParser = instantiateParser(logformat);

        for (String field: fieldList) {
            if (field.endsWith(".*")) {
                newParser.addParseTarget(ParsedRecord.class.getMethod("setMultiValueString",
                        String.class, String.class), field);
            } else {
                newParser.addParseTarget(ParsedRecord.class.getMethod("set",
                        String.class, String.class), field);
                newParser.addParseTarget(ParsedRecord.class.getMethod("set",
                        String.class, Long.class), field);
                newParser.addParseTarget(ParsedRecord.class.getMethod("set",
                        String.class, Double.class), field);
            }
        }

    } catch (NoSuchMethodException
            |SecurityException e) {
        throw new IOException(e.toString());
    }
    return newParser;
}
 
Example #16
Source File: TestFieldSettersNotEmpty.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testString() throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
    new Parser<>(TestRecordString.class)
        .setRootType("INPUT")
        .addDissector(new EmptyValuesDissector())
        .parse("Doesn't matter")

        .noString("ANY:any")
        .noString("STRING:string")
        .noString("INT:int")
        .noString("LONG:long")
        .noString("FLOAT:float")
        .noString("DOUBLE:double");
}
 
Example #17
Source File: TestFieldSettersNotEmpty.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testLong() throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
    new Parser<>(TestRecordLong.class)
        .setRootType("INPUT")
        .addDissector(new EmptyValuesDissector())
        .parse("Doesn't matter")

        .noLong("ANY:any")
        .noLong("INT:int")
        .noLong("LONG:long");
}
 
Example #18
Source File: TestFieldSettersNotEmpty.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testDouble() throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
    new Parser<>(TestRecordDouble.class)
        .setRootType("INPUT")
        .addDissector(new EmptyValuesDissector())
        .parse("Doesn't matter")

        .noDouble("ANY:any")
        .noDouble("FLOAT:float")
        .noDouble("DOUBLE:double");
}
 
Example #19
Source File: TestFieldSettersAlwaysCombined.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testString() throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
    new Parser<>(TestRecordString.class)
        .setRootType("INPUT")
        .addDissector(new NormalValuesDissector())
        .parse("Doesn't matter")

        .expectString("ANY:any",       "42")
        .expectString("STRING:string", "FortyTwo")
        .expectString("INT:int",       "42")
        .expectString("LONG:long",     "42")
        .expectString("FLOAT:float",   "42.0")
        .expectString("DOUBLE:double", "42.0");
}
 
Example #20
Source File: TestFieldSettersAlwaysCombined.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testLong() throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
    new Parser<>(TestRecordLong.class)
        .setRootType("INPUT")
        .addDissector(new NormalValuesDissector())
        .parse("Doesn't matter")

        .expectLong("ANY:any",    42L)
        .expectLong("INT:int",    42L)
        .expectLong("LONG:long",  42L);
}
 
Example #21
Source File: TestFieldSettersAlwaysCombined.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testDouble() throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
    new Parser<>(TestRecordDouble.class)
        .setRootType("INPUT")
        .addDissector(new NormalValuesDissector())
        .parse("Doesn't matter")

        .expectDouble("ANY:any",       42D)
        .expectDouble("FLOAT:float",   42D)
        .expectDouble("DOUBLE:double", 42D);
}
 
Example #22
Source File: TestFieldSettersNotNull.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testString() throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
    new Parser<>(TestRecordString.class)
        .setRootType("INPUT")
        .addDissector(new NullValuesDissector())
        .parse("Doesn't matter")

        .noString("ANY:any")
        .noString("STRING:string")
        .noString("INT:int")
        .noString("LONG:long")
        .noString("FLOAT:float")
        .noString("DOUBLE:double");
}
 
Example #23
Source File: TestFieldSettersAlwaysSeparate.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testLong() throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
    new Parser<>(TestRecordLong.class)
        .setRootType("INPUT")
        .addDissector(new NormalValuesDissector())
        .parse("Doesn't matter")

        .expectLong("ANY:any",    42L)
        .expectLong("INT:int",    42L)
        .expectLong("LONG:long",  42L);
}
 
Example #24
Source File: HttpdLogRecord.java    From Bats with Apache License 2.0 5 votes vote down vote up
/**
 * This record will be used with a single parser. For each field that is to be parsed a setter will be called. It
 * registers a setter method for each field being parsed. It also builds the data writers to hold the data beings
 * parsed.
 *
 * @param parser
 * @param mapWriter
 * @param type
 * @param parserFieldName
 * @param drillFieldName
 * @throws NoSuchMethodException
 */
public void addField(final Parser<HttpdLogRecord> parser, final MapWriter mapWriter, final EnumSet<Casts> type, final String parserFieldName, final String drillFieldName) throws NoSuchMethodException {
  final boolean hasWildcard = parserFieldName.endsWith(HttpdParser.PARSER_WILDCARD);

  /**
   * This is a dynamic way to map the setter for each specified field type. <br/>
   * e.g. a TIME.STAMP may map to a LONG while a referrer may map to a STRING
   */
  if (hasWildcard) {
    final String cleanName = parserFieldName.substring(0, parserFieldName.length() - HttpdParser.PARSER_WILDCARD.length());
    LOG.debug("Adding WILDCARD parse target: {} as {}, with field name: {}", parserFieldName, cleanName, drillFieldName);
    parser.addParseTarget(this.getClass().getMethod("setWildcard", String.class, String.class), parserFieldName);
    parser.addParseTarget(this.getClass().getMethod("setWildcard", String.class, Double.class), parserFieldName);
    parser.addParseTarget(this.getClass().getMethod("setWildcard", String.class, Long.class), parserFieldName);
    wildcards.put(cleanName, mapWriter.map(drillFieldName));
  } else if (type.contains(Casts.DOUBLE)) {
    LOG.debug("Adding DOUBLE parse target: {}, with field name: {}", parserFieldName, drillFieldName);
    parser.addParseTarget(this.getClass().getMethod("set", String.class, Double.class), parserFieldName);
    doubles.put(parserFieldName, mapWriter.float8(drillFieldName));
  } else if (type.contains(Casts.LONG)) {
    LOG.debug("Adding LONG parse target: {}, with field name: {}", parserFieldName, drillFieldName);
    parser.addParseTarget(this.getClass().getMethod("set", String.class, Long.class), parserFieldName);
    longs.put(parserFieldName, mapWriter.bigInt(drillFieldName));
  } else {
    LOG.debug("Adding STRING parse target: {}, with field name: {}", parserFieldName, drillFieldName);
    if (parserFieldName.startsWith("TIME.STAMP:")) {
      parser.addParseTarget(this.getClass().getMethod("setTimestamp", String.class, String.class), parserFieldName);
      times.put(parserFieldName, mapWriter.timeStamp(drillFieldName));
    } else {
      parser.addParseTarget(this.getClass().getMethod("set", String.class, String.class), parserFieldName);
      strings.put(parserFieldName, mapWriter.varChar(drillFieldName));
    }
  }
}
 
Example #25
Source File: TestDissectUserAgent.java    From yauaa with Apache License 2.0 5 votes vote down vote up
@Test
public void testExtractUrlFields() {
    Parser<TestRecord> parser = new HttpdLoglineParser<>(TestRecord.class, "%t \"%{User-agent}i\"");
    parser.addDissector(new UserAgentDissector());

    String testUri = "https://yauaa.basjes.nl:8080/something.html?aap=noot&mies=wim#zus";

    String testUserAgent =
        "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) " +
        "AppleWebKit/537.36 (KHTML, like Gecko) " +
        "Chrome/41.0.2272.96 " +
        "Mobile Safari/537.36" +
        "(" + testUri + ")";

    String testLogLine = "[10/Aug/2012:23:55:11 +0200] \""+testUserAgent+"\"";

    DissectorTester
        .create()
        .withParser(parser)
        .withInput(testLogLine)
        // Did we get the field
        .expect("HTTP.USERAGENT:request.user-agent",                                testUserAgent)

        // Basic dissections
        .expect("STRING:request.user-agent.device_class",                           "Phone")
        .expect("STRING:request.user-agent.agent_name_version",                     "Chrome 41.0.2272.96")
        .expect("HTTP.URI:request.user-agent.agent_information_url",                testUri)

        // Further extractions from the URI we found
        .expect("HTTP.PROTOCOL:request.user-agent.agent_information_url.protocol",  "https")
        .expect("HTTP.HOST:request.user-agent.agent_information_url.host",          "yauaa.basjes.nl")
        .expect("HTTP.PORT:request.user-agent.agent_information_url.port",          "8080")
        .expect("HTTP.PATH:request.user-agent.agent_information_url.path",          "/something.html")
        .expect("HTTP.QUERYSTRING:request.user-agent.agent_information_url.query",  "&aap=noot&mies=wim")
        .expect("STRING:request.user-agent.agent_information_url.query.aap",        "noot")
        .expect("STRING:request.user-agent.agent_information_url.query.mies",       "wim")
        .expect("HTTP.REF:request.user-agent.agent_information_url.ref",            "zus")
        .checkExpectations();
}
 
Example #26
Source File: TestCase.java    From logparser with Apache License 2.0 5 votes vote down vote up
public static Parser<TestRecord> createTestParser() throws NoSuchMethodException {
    Parser<TestRecord> parser = new HttpdLoglineParser<>(TestRecord.class, getLogFormat());

    parser.addDissector(new nl.basjes.parse.httpdlog.dissectors.ScreenResolutionDissector());

    parser.addTypeRemapping("request.firstline.uri.query.g", "HTTP.URI");
    parser.addTypeRemapping("request.firstline.uri.query.r", "HTTP.URI");
    parser.addTypeRemapping("request.firstline.uri.query.s", "SCREENRESOLUTION");

    parser.addParseTarget("setConnectionClientHost", "IP:connection.client.host");
    parser.addParseTarget("setRequestReceiveTime",   "TIME.STAMP:request.receive.time");
    parser.addParseTarget("setReferrer",             "STRING:request.firstline.uri.query.g.query.promo");
    parser.addParseTarget("setScreenResolution",     "STRING:request.firstline.uri.query.s");
    parser.addParseTarget("setScreenWidth",          "SCREENWIDTH:request.firstline.uri.query.s.width");
    parser.addParseTarget("setScreenHeight",         "SCREENHEIGHT:request.firstline.uri.query.s.height");
    parser.addParseTarget("setGoogleQuery",          "STRING:request.firstline.uri.query.r.query.blabla");
    parser.addParseTarget("setBui",                  "HTTP.COOKIE:request.cookies.bui");
    parser.addParseTarget("setUseragent",            "HTTP.USERAGENT:request.user-agent");

    parser.addDissector(new GeoIPISPDissector(ISP_TEST_MMDB));
    parser.addParseTarget("setAsnNumber",            "ASN:connection.client.host.asn.number");
    parser.addParseTarget("setAsnOrganization",      "STRING:connection.client.host.asn.organization");
    parser.addParseTarget("setIspName",              "STRING:connection.client.host.isp.name");
    parser.addParseTarget("setIspOrganization",      "STRING:connection.client.host.isp.organization");

    parser.addDissector(new GeoIPCityDissector(CITY_TEST_MMDB));
    parser.addParseTarget("setContinentName",        "STRING:connection.client.host.continent.name");
    parser.addParseTarget("setContinentCode",        "STRING:connection.client.host.continent.code");
    parser.addParseTarget("setCountryName",          "STRING:connection.client.host.country.name");
    parser.addParseTarget("setCountryIso",           "STRING:connection.client.host.country.iso");
    parser.addParseTarget("setSubdivisionName",      "STRING:connection.client.host.subdivision.name");
    parser.addParseTarget("setSubdivisionIso",       "STRING:connection.client.host.subdivision.iso");
    parser.addParseTarget("setCityName",             "STRING:connection.client.host.city.name");
    parser.addParseTarget("setPostalCode",           "STRING:connection.client.host.postal.code");
    parser.addParseTarget("setLocationLatitude",     "STRING:connection.client.host.location.latitude");
    parser.addParseTarget("setLocationLongitude",    "STRING:connection.client.host.location.longitude");

    return parser;
}
 
Example #27
Source File: TestParserDoFnInline.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testInlineDefinition() {
    List<String> logLines = Collections.singletonList(TestCase.getInputLine());

    // Apply Create, passing the list and the coder, to create the PCollection.
    PCollection<String> input = pipeline.apply(Create.of(logLines)).setCoder(StringUtf8Coder.of());

    PCollection<TestRecord> filledTestRecords = input
        .apply("Extract Elements from logline",
            ParDo.of(new DoFn<String, TestRecord>() {
                private Parser<TestRecord> parser;

                @Setup
                public void setup() throws NoSuchMethodException {
                    parser = TestCase.createTestParser();
                }

                @ProcessElement
                public void processElement(ProcessContext c) throws InvalidDissectorException, MissingDissectorsException, DissectionFailure {
                    c.output(parser.parse(c.element()));
                }
            }));

    TestRecord expected = new TestRecord().setFullValid();

    PAssert.that(filledTestRecords).containsInAnyOrder(expected);

    pipeline.run().waitUntilFinish();
}
 
Example #28
Source File: Main.java    From logparser with Apache License 2.0 5 votes vote down vote up
private void run() throws InvalidDissectorException, MissingDissectorsException, NoSuchMethodException, DissectionFailure {

        // This format and logline originate from here:
        // https://stackoverflow.com/questions/20349184/java-parse-log-file
        String logformat = "%t %u [%D %h %{True-Client-IP}i %{UNIQUE_ID}e %r] %{Cookie}i %s \"%{User-Agent}i\" \"%{host}i\" %l %b %{Referer}i";
        String logline = "[02/Dec/2013:14:10:30 -0000] - [52075 10.102.4.254 177.43.52.210 UpyU1gpmBAwAACfd5W0AAAAW GET /SS14-VTam-ny_019.j" +
                "pg.rendition.zoomable.jpg HTTP/1.1] hsfirstvisit=http%3A%2F%2Fwww.domain.com%2Fen-us||1372268254000; _opt_vi_3FNG8DZU=F870" +
                "DCFD-CBA4-4B6E-BB58-4605A78EE71A; __ptca=145721067.0aDxsZlIuM48.1372279055.1379945057.1379950362.9; __ptv_62vY4e=0aDxsZlIu" +
                "M48; __pti_62vY4e=0aDxsZlIuM48; __ptcz=145721067.1372279055.1.0.ptmcsr=(direct)|ptmcmd=(none)|ptmccn=(direct); __hstc=1457" +
                "21067.b86362bb7a1d257bfa2d1fb77e128a85.1372268254968.1379934256743.1379939561848.9; hubspotutk=b86362bb7a1d257bfa2d1fb77e1" +
                "28a85; USER_GROUP=julinho%3Afalse; has_js=1; WT_FPC=id=177.43.52.210-1491335248.30301337:lv=1385997780893:ss=1385997780893" +
                "; dtCookie=1F2E0E1037589799D8D503EB8CFA12A1|_default|1; RM=julinho%3A5248423ad3fe062f06c54915e6cde5cb45147977; wcid=UpyKsQ" +
                "pmBAwAABURyNoAAAAS%3A35d8227ba1e8a9a9cebaaf8d019a74777c32b4c8; Carte::KerberosLexicon_getWGSN=82ae3dcd1b956288c3c86bdbed6e" +
                "bcc0fd040e1e; UserData=Username%3AJULINHO%3AHomepage%3A1%3AReReg%3A0%3ATrialist%3A0%3ALanguage%3Aen%3ACcode%3Abr%3AForceRe" +
                "Reg%3A0; UserID=1356673%3A12345%3A1234567890%3A123%3Accode%3Abr; USER_DATA=1356673%3Ajulinho%3AJulio+Jose%3Ada+Silva%3Ajul" +
                "inho%40tecnoblu.com.br%3A0%3A1%3Aen%3Abr%3A%3AWGSN%3A1385990833.81925%3A82ae3dcd1b956288c3c86bdbed6ebcc0fd040e1e; MODE=FON" +
                "TIS; SECTION=%2Fcontent%2Fsection%2Fhome.html; edge_auth=ip%3D177.43.52.210~expires%3D1385994522~access%3D%2Fapps%2F%2A%21" +
                "%2Fbin%2F%2A%21%2Fcontent%2F%2A%21%2Fetc%2F%2A%21%2Fhome%2F%2A%21%2Flibs%2F%2A%21%2Freport%2F%2A%21%2Fsection%2F%2A%21%2Fw" +
                "gsn%2F%2A~md5%3D90e73ee10161c1afacab12c6ea30b4ef; __utma=94539802.1793276213.1372268248.1385572390.1385990581.16; __utmb=9" +
                "4539802.52.9.1385991739764; __utmc=94539802; __utmz=94539802.1372268248.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none);" +
                " WT_FPC=id=177.43.52.210-1491335248.30301337:lv=1386000374581:ss=1386000374581; dtPC=-; NSC_wtfswfs_xfcgbsn40-41=ffffffff0" +
                "96e1a1d45525d5f4f58455e445a4a423660; akamai-edge=5ac6e5b3d0bbe2ea771bb2916d8bab34ea222a6a 200 \"Mozilla/5.0 (Windows NT 6." +
                "2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36\" \"www.domain.com\" - 463952 http://ww" +
                "w.domain.com/content/report/shows/New_York/KSHK/trip/s_s_14_ny_ww/sheers.html";

        printAllPossibles(logformat);

        Parser<MyRecord> parser = new HttpdLoglineParser<>(MyRecord.class, logformat);
        MyRecord record = new MyRecord();

        LOG.info("==================================================================================");
        parser.parse(record, logline);
        LOG.info(record.toString());
        LOG.info("==================================================================================");
    }
 
Example #29
Source File: TestCase.java    From logparser with Apache License 2.0 5 votes vote down vote up
public static Parser<TestRecord> createTestParser() throws NoSuchMethodException {
    Parser<TestRecord> parser = new HttpdLoglineParser<>(TestRecord.class, getLogFormat());

    parser.addDissector(new nl.basjes.parse.httpdlog.dissectors.ScreenResolutionDissector());

    parser.addTypeRemapping("request.firstline.uri.query.g", "HTTP.URI");
    parser.addTypeRemapping("request.firstline.uri.query.r", "HTTP.URI");
    parser.addTypeRemapping("request.firstline.uri.query.s", "SCREENRESOLUTION");

    parser.addParseTarget("setConnectionClientHost", "IP:connection.client.host");
    parser.addParseTarget("setRequestReceiveTime",   "TIME.STAMP:request.receive.time");
    parser.addParseTarget("setReferrer",             "STRING:request.firstline.uri.query.g.query.promo");
    parser.addParseTarget("setScreenResolution",     "STRING:request.firstline.uri.query.s");
    parser.addParseTarget("setScreenWidth",          "SCREENWIDTH:request.firstline.uri.query.s.width");
    parser.addParseTarget("setScreenHeight",         "SCREENHEIGHT:request.firstline.uri.query.s.height");
    parser.addParseTarget("setGoogleQuery",          "STRING:request.firstline.uri.query.r.query.blabla");
    parser.addParseTarget("setBui",                  "HTTP.COOKIE:request.cookies.bui");
    parser.addParseTarget("setUseragent",            "HTTP.USERAGENT:request.user-agent");

    parser.addDissector(new GeoIPISPDissector(ISP_TEST_MMDB));
    parser.addParseTarget("setAsnNumber",            "ASN:connection.client.host.asn.number");
    parser.addParseTarget("setAsnOrganization",      "STRING:connection.client.host.asn.organization");
    parser.addParseTarget("setIspName",              "STRING:connection.client.host.isp.name");
    parser.addParseTarget("setIspOrganization",      "STRING:connection.client.host.isp.organization");

    parser.addDissector(new GeoIPCityDissector(CITY_TEST_MMDB));
    parser.addParseTarget("setContinentName",        "STRING:connection.client.host.continent.name");
    parser.addParseTarget("setContinentCode",        "STRING:connection.client.host.continent.code");
    parser.addParseTarget("setCountryName",          "STRING:connection.client.host.country.name");
    parser.addParseTarget("setCountryIso",           "STRING:connection.client.host.country.iso");
    parser.addParseTarget("setSubdivisionName",      "STRING:connection.client.host.subdivision.name");
    parser.addParseTarget("setSubdivisionIso",       "STRING:connection.client.host.subdivision.iso");
    parser.addParseTarget("setCityName",             "STRING:connection.client.host.city.name");
    parser.addParseTarget("setPostalCode",           "STRING:connection.client.host.postal.code");
    parser.addParseTarget("setLocationLatitude",     "STRING:connection.client.host.location.latitude");
    parser.addParseTarget("setLocationLongitude",    "STRING:connection.client.host.location.longitude");

    return parser;
}
 
Example #30
Source File: TestParserMapFunctionInline.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Test
public void testInlineDefinition() throws Exception {
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSet<String> input = env.fromElements(TestCase.getInputLine());

    DataSet<TestRecord> filledTestRecords = input
        .map(new RichMapFunction<String, TestRecord>() {
            private Parser<TestRecord> parser;

            @Override
            public void open(org.apache.flink.configuration.Configuration parameters) throws Exception {
                parser = TestCase.createTestParser();
            }

            @Override
            public TestRecord map(String line) throws Exception {
                return parser.parse(line);
            }
        }).name("Extract Elements from logline");

    filledTestRecords.print();

    List<TestRecord> result = filledTestRecords.collect();

    assertEquals(1, result.size());
    assertEquals(new TestRecord().setFullValid(), result.get(0));
}