import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.commons.lang.time.StopWatch;import java.util.ArrayList;import java.util.List;public class Prime { //Method to calculate and count the prime numbers public List<Integer> countPrime(int n){ List<Integer> primes = new ArrayList<>(); for (int i = 2; i < n; i++){ boolean isPrime = true; //check if the number is prime or not for (int j = 2; j < i; j++){ if (i % j == 0){ isPrime = false; break; // exit the inner for loop } } //add the primes into the List if (isPrime){ primes.add(i); } } return primes; } //Main method to run the program public static void main(String[]args){ StopWatch watch = new StopWatch(); watch.start(); //creating javaSparkContext object SparkConf conf = new SparkConf().setAppName("haha").setMaster("local[2]"); JavaSparkContext sc = new JavaSparkContext(conf); //new prime object Prime prime = new Prime(); //prime.countPrime(1000000); //parallelize the collection JavaRDD<Integer> rdd = sc.parallelize(prime.countPrime(1000000),12); long count = rdd.filter(e -> e == 2|| e % 2 != 0).count(); //Stopping the execution time and printing the results watch.stop(); System.out.println("Total time took to run the process is " + watch); System.out.println("The number of prime between 0 to 1000000 is " + count); sc.stop(); }}Hi there , i have this following code which parallelize an algorithm. The algorithm counts the number of prime in a given range. But the code is only parallelizing the list of primes but not the process itself. How can modify the code to parallelize process of finding the primes?
1 Answer1
It's an order of operations issue - you're runningprime.CountPrime before you've created your Spark RDD. Spark runs operations in parallel that are defined within the RDD object'smap,reduce,filter, etc. operations. You need to rethink your approach:
Use
sc.range(1, 1000000, 1, 12)to create an RDD of all integers from 1 to 1,000,000.Create an
isPrime(int n)method to evaluate if a given integer is prime.filteryour RDD on the condition of yourisPrimemethod (this is the part that will execute in parallel).countthe filtered RDD.
Comments
Explore related questions
See similar questions with these tags.

