fork download
  1. from pyspark import SparkContext
  2.  
  3. def mapper(line):
  4. words = line.split()
  5. return [(word, 1) for word in words]
  6.  
  7. def reducer(word_count):
  8. return word_count.reduceByKey(lambda a, b: a + b)
  9.  
  10. if __name__ == "__main__":
  11. sc = SparkContext("local", "WordCount")
  12.  
  13. # Read the input file
  14. input_file = "path/to/your/input.txt"
  15. lines = sc.textFile(input_file)
  16.  
  17. # Apply the mapper
  18. word_counts = lines.flatMap(mapper)
  19.  
  20. # Apply the reducer
  21. final_counts = reducer(word_counts)
  22.  
  23. # Collect and print results
  24. results = final_counts.collect()
  25.  
  26. for word, count in results:
  27. print(f"{word}: {count}")
  28.  
  29. # Stop the Spark context
  30. sc.stop()
Success #stdin #stdout #stderr 0.01s 5276KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Error: near line 1: near "from": syntax error