from pyspark import SparkContext
def mapper(line):
words = line.split()
return [(word, 1) for word in words]
def reducer(word_count):
return word_count.reduceByKey(lambda a, b: a + b)
if __name__ == "__main__":
sc = SparkContext("local", "WordCount")
# Read the input file
input_file = "path/to/your/input.txt"
lines = sc.textFile(input_file)
# Apply the mapper
word_counts = lines.flatMap(mapper)
# Apply the reducer
final_counts = reducer(word_counts)
# Collect and print results
results = final_counts.collect()
for word, count in results:
print(f"{word}: {count}")
# Stop the Spark context
sc.stop()
ZnJvbSBweXNwYXJrIGltcG9ydCBTcGFya0NvbnRleHQKCmRlZiBtYXBwZXIobGluZSk6CiAgICB3b3JkcyA9IGxpbmUuc3BsaXQoKQogICAgcmV0dXJuIFsod29yZCwgMSkgZm9yIHdvcmQgaW4gd29yZHNdCgpkZWYgcmVkdWNlcih3b3JkX2NvdW50KToKICAgIHJldHVybiB3b3JkX2NvdW50LnJlZHVjZUJ5S2V5KGxhbWJkYSBhLCBiOiBhICsgYikKCmlmIF9fbmFtZV9fID09ICJfX21haW5fXyI6CiAgICBzYyA9IFNwYXJrQ29udGV4dCgibG9jYWwiLCAiV29yZENvdW50IikKCiAgICAjIFJlYWQgdGhlIGlucHV0IGZpbGUKICAgIGlucHV0X2ZpbGUgPSAicGF0aC90by95b3VyL2lucHV0LnR4dCIKICAgIGxpbmVzID0gc2MudGV4dEZpbGUoaW5wdXRfZmlsZSkKCiAgICAjIEFwcGx5IHRoZSBtYXBwZXIKICAgIHdvcmRfY291bnRzID0gbGluZXMuZmxhdE1hcChtYXBwZXIpCgogICAgIyBBcHBseSB0aGUgcmVkdWNlcgogICAgZmluYWxfY291bnRzID0gcmVkdWNlcih3b3JkX2NvdW50cykKCiAgICAjIENvbGxlY3QgYW5kIHByaW50IHJlc3VsdHMKICAgIHJlc3VsdHMgPSBmaW5hbF9jb3VudHMuY29sbGVjdCgpCiAgICAKICAgIGZvciB3b3JkLCBjb3VudCBpbiByZXN1bHRzOgogICAgICAgIHByaW50KGYie3dvcmR9OiB7Y291bnR9IikKCiAgICAjIFN0b3AgdGhlIFNwYXJrIGNvbnRleHQKICAgIHNjLnN0b3AoKQ==