import java.util.HashMap; import java.util.StringTokenizer; import java.util.zip.GZIPInputStream; import java.io.*; /** * @author asingh * */ public class HashJoin2 { public static HashMap map1 = null; public static long count = 0; public static void runMatch(String filename2) { try { // Open the file that is the first // command line parameter FileInputStream fstream = new FileInputStream(filename2); InputStream in = new GZIPInputStream(fstream); // Get the object of DataInputStream //DataInputStream in = new DataInputStream(fstream); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String strLine; // Read File Line By int i=0; while ((strLine = br.readLine()) != null) { i++; if(i%29999 == 0){ System.err.println("Gone through " + i + "rows, num matches = " + count); } if( map1.containsKey(strLine) && map1.get(strLine).equals(1)) { count++; map1.put(strLine, 0); } } // Close the input stream in.close(); } catch (Exception e) {// Catch exception if any System.err.println("Error: " + e.getMessage()); } } /** * @param args */ public static void main(String[] args) { String filename1 = args[0]; String filename2 = args[1]; File file1 = new File(filename1); File file2 = new File(filename2); if(file1.length() > file2.length()) { String temp = filename1; filename1 = filename2; filename2 = temp; } try { // Open the file that is the first // command line parameter FileInputStream fstream = new FileInputStream(filename1); //InputStream in = new GZIPInputStream(new FileInputStream(filename1)); // Get the object of DataInputStream DataInputStream in = new DataInputStream(fstream); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String strLine; // Read File Line By Line int numLines = 0; map1 = new HashMap(); int hashTableCount = 0; StringTokenizer myToken = null; while ((strLine = br.readLine()) != null) { myToken = new StringTokenizer(strLine); String newString = myToken.nextToken() + ":" + myToken.nextToken(); myToken.nextToken(); if(Integer.parseInt(myToken.nextToken()) <= 5 ) continue; numLines++; System.out.println(newString); map1.put(newString, 1); if(numLines%49999999 == 0) { hashTableCount++; System.err.println("Done Creating HashTable " + hashTableCount); runMatch(filename2); map1 = new HashMap(); } } runMatch(filename2); // Close the input stream in.close(); } catch (Exception e) {// Catch exception if any System.err.println("Error: " + e.getMessage()); } System.out.println("Total Matches are " + count); } }