WORD AND CHARACTER COUNTER - Analyzing text file statistics

Python
#!/usr/bin/env python3
"""
WORD AND CHARACTER COUNTER - Analyzing text file statistics
Demonstrates counting words, lines, and characters
"""

import os
import tempfile
import string

print("=" * 60)
print("WORD AND CHARACTER COUNTER - Text Analysis")
print("=" * 60)

temp_dir = tempfile.gettempdir()

# Create sample text file
text_file = os.path.join(temp_dir, "sample.txt")
with open(text_file, 'w') as f:
    f.write("""Python is a high-level programming language.
It is known for its simplicity and readability.
Python is widely used in data science, web development, and automation.
The language was created by Guido van Rossum in 1991.
Python emphasizes code readability with its notable use of whitespace.
""")

print("Sample text file created.\n")

# Example 1: Count lines
print("1. Counting Lines")
print("-" * 40)
line_count = 0
with open(text_file, 'r') as f:
    for line in f:
        line_count += 1

print(f"Total lines: {line_count}")

# Example 2: Count characters
print("\n2. Counting Characters")
print("-" * 40)
char_count = 0
with open(text_file, 'r') as f:
    for line in f:
        char_count += len(line)

print(f"Total characters (including newlines): {char_count}")

# Count without newlines
char_count_no_newline = 0
with open(text_file, 'r') as f:
    for line in f:
        char_count_no_newline += len(line.rstrip('\n'))

print(f"Total characters (excluding newlines): {char_count_no_newline}")

# Example 3: Count words
print("\n3. Counting Words")
print("-" * 40)
word_count = 0
with open(text_file, 'r') as f:
    for line in f:
        words = line.split()
        word_count += len(words)

print(f"Total words: {word_count}")

# Example 4: Average word length
print("\n4. Average Word Length")
print("-" * 40)
total_chars = 0
total_words = 0

with open(text_file, 'r') as f:
    for line in f:
        words = line.split()
        for word in words:
            # Remove punctuation
            clean_word = word.strip(string.punctuation)
            total_chars += len(clean_word)
            total_words += 1

avg_length = total_chars / total_words if total_words > 0 else 0
print(f"Average word length: {avg_length:.2f} characters")

# Example 5: Count specific word occurrences
print("\n5. Count Specific Word Occurrences")
print("-" * 40)
target_word = "python"
count = 0

with open(text_file, 'r') as f:
    for line in f:
        # Case-insensitive search
        words = line.lower().split()
        for word in words:
            clean_word = word.strip(string.punctuation)
            if clean_word == target_word:
                count += 1

print(f"'{target_word}' appears {count} times")

# Example 6: Word frequency analysis
print("\n6. Word Frequency Analysis")
print("-" * 40)
word_freq = {}

with open(text_file, 'r') as f:
    for line in f:
        words = line.lower().split()
        for word in words:
            clean_word = word.strip(string.punctuation)
            if clean_word:
                word_freq[clean_word] = word_freq.get(clean_word, 0) + 1

# Show top 10 most common words
print("Top 10 most common words:")
sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
for i, (word, freq) in enumerate(sorted_words[:10], 1):
    print(f"  {i:2d}. {word:15s}: {freq}")

# Example 7: Character frequency
print("\n7. Character Frequency (Letters Only)")
print("-" * 40)
char_freq = {}

with open(text_file, 'r') as f:
    for line in f:
        for char in line.lower():
            if char.isalpha():
                char_freq[char] = char_freq.get(char, 0) + 1

# Show top 10 characters
sorted_chars = sorted(char_freq.items(), key=lambda x: x[1], reverse=True)
print("Top 10 most common letters:")
for i, (char, freq) in enumerate(sorted_chars[:10], 1):
    print(f"  {i:2d}. '{char}': {freq}")

# Example 8: Line statistics
print("\n8. Line Length Statistics")
print("-" * 40)
line_lengths = []

with open(text_file, 'r') as f:
    for line in f:
        line_lengths.append(len(line.rstrip('\n')))

min_length = min(line_lengths)
max_length = max(line_lengths)
avg_length = sum(line_lengths) / len(line_lengths)

print(f"Minimum line length: {min_length} characters")
print(f"Maximum line length: {max_length} characters")
print(f"Average line length: {avg_length:.2f} characters")

# Example 9: Count paragraphs (blank line separated)
print("\n9. Counting Paragraphs")
print("-" * 40)
paragraph_count = 0
in_paragraph = False

with open(text_file, 'r') as f:
    for line in f:
        if line.strip():
            if not in_paragraph:
                paragraph_count += 1
                in_paragraph = True
        else:
            in_paragraph = False

print(f"Total paragraphs: {paragraph_count}")

# Example 10: Complete statistics report
print("\n10. Complete Statistics Report")
print("-" * 40)
stats = {
    'lines': 0,
    'words': 0,
    'characters': 0,
    'letters': 0,
    'digits': 0,
    'spaces': 0,
    'punctuation': 0
}

with open(text_file, 'r') as f:
    for line in f:
        stats['lines'] += 1
        stats['words'] += len(line.split())
        stats['characters'] += len(line)

        for char in line:
            if char.isalpha():
                stats['letters'] += 1
            elif char.isdigit():
                stats['digits'] += 1
            elif char.isspace():
                stats['spaces'] += 1
            elif char in string.punctuation:
                stats['punctuation'] += 1

report_file = os.path.join(temp_dir, "statistics.txt")
with open(report_file, 'w') as f:
    f.write("TEXT STATISTICS REPORT\n")
    f.write("=" * 50 + "\n")
    f.write(f"Total Lines:       {stats['lines']:>8}\n")
    f.write(f"Total Words:       {stats['words']:>8}\n")
    f.write(f"Total Characters:  {stats['characters']:>8}\n")
    f.write(f"Letters:           {stats['letters']:>8}\n")
    f.write(f"Digits:            {stats['digits']:>8}\n")
    f.write(f"Spaces:            {stats['spaces']:>8}\n")
    f.write(f"Punctuation:       {stats['punctuation']:>8}\n")
    f.write("=" * 50 + "\n")

with open(report_file, 'r') as f:
    print(f.read())

# Cleanup
for f in [text_file, report_file]:
    if os.path.exists(f):
        os.remove(f)

print("\n" + "=" * 60)
print("Key Points:")
print("  - Count lines by iterating")
print("  - Count words with split()")
print("  - Count characters with len()")
print("  - Use dict for frequency analysis")
print("  - Can generate detailed statistics")
print("=" * 60)