{ "title": "Naive Bayes Mastery: 100 MCQs", "description": "A comprehensive set of 100 multiple-choice questions to test and deepen your understanding of Naive Bayes classifiers, from fundamental concepts to advanced real-world applications and challenges.", "questions": [ { "id": 1, "questionText": "What is the core assumption of Naive Bayes?", "options": [ "All classes have equal probability", "The dataset is balanced", "Features are correlated", "Features are independent given the class label" ], "correctAnswerIndex": 3, "explanation": "Naive Bayes assumes conditional independence of features given the class, which simplifies probability computation." }, { "id": 2, "questionText": "Which theorem is Naive Bayes based on?", "options": [ "Markov Theorem", "Pythagoras Theorem", "Central Limit Theorem", "Bayes' Theorem" ], "correctAnswerIndex": 3, "explanation": "Naive Bayes uses Bayes’ Theorem to compute posterior probabilities for classification." }, { "id": 3, "questionText": "In Naive Bayes, what is the 'prior probability'?", "options": [ "Probability of each class before observing features", "Probability of features given the class", "Probability of misclassification", "Conditional probability of test data" ], "correctAnswerIndex": 0, "explanation": "The prior is the initial probability of each class based on the training dataset." }, { "id": 4, "questionText": "Which type of Naive Bayes is suitable for text data?", "options": [ "Gaussian Naive Bayes", "Bernoulli Naive Bayes", "Multinomial Naive Bayes", "Poisson Naive Bayes" ], "correctAnswerIndex": 2, "explanation": "Multinomial NB works well for text features, as it handles word frequencies." }, { "id": 5, "questionText": "Which Naive Bayes variant is used for binary features?", "options": [ "Gaussian Naive Bayes", "Bernoulli Naive Bayes", "Poisson Naive Bayes", "Multinomial Naive Bayes" ], "correctAnswerIndex": 1, "explanation": "Bernoulli NB models binary presence/absence features effectively." }, { "id": 6, "questionText": "In Gaussian Naive Bayes, features are assumed to follow which distribution?", "options": [ "Uniform distribution", "Normal (Gaussian) distribution", "Exponential distribution", "Poisson distribution" ], "correctAnswerIndex": 1, "explanation": "Gaussian NB models continuous features using a normal distribution." }, { "id": 7, "questionText": "What is 'likelihood' in Naive Bayes?", "options": [ "Probability of features given the class", "Posterior probability", "Prior probability", "Probability of the class given features" ], "correctAnswerIndex": 0, "explanation": "Likelihood is P(features|class) used in Bayes’ formula to compute posterior probability." }, { "id": 8, "questionText": "Which probability does Naive Bayes calculate to make predictions?", "options": [ "Prior probability only", "Posterior probability P(class|features)", "Feature probability only", "Joint probability of all classes" ], "correctAnswerIndex": 1, "explanation": "Naive Bayes computes the posterior probability for each class and selects the class with the highest value." }, { "id": 9, "questionText": "Why is it called 'Naive' Bayes?", "options": [ "Because it is simple to implement", "Because it assumes feature independence", "Because it only works on small datasets", "Because it ignores class labels" ], "correctAnswerIndex": 1, "explanation": "The method is 'naive' due to its strong assumption that features are independent given the class." }, { "id": 10, "questionText": "Which metric is commonly used to evaluate Naive Bayes classifiers?", "options": [ "R-squared value", "Euclidean distance", "Accuracy, Precision, Recall, F1-score", "Mean squared error" ], "correctAnswerIndex": 2, "explanation": "Classification metrics like accuracy, precision, recall, and F1-score are used to evaluate Naive Bayes performance." }, { "id": 11, "questionText": "Scenario: You have continuous features with Gaussian distribution. Which Naive Bayes variant is suitable?", "options": [ "Bernoulli Naive Bayes", "Gaussian Naive Bayes", "Multinomial Naive Bayes", "Poisson Naive Bayes" ], "correctAnswerIndex": 1, "explanation": "Gaussian NB models continuous features using the mean and variance of each class." }, { "id": 12, "questionText": "Scenario: Your dataset has counts of words per document. Which Naive Bayes is ideal?", "options": [ "Bernoulli Naive Bayes", "Multinomial Naive Bayes", "Gaussian Naive Bayes", "Poisson Naive Bayes" ], "correctAnswerIndex": 1, "explanation": "Multinomial NB handles discrete count data such as word frequencies." }, { "id": 13, "questionText": "Scenario: You have binary features indicating presence or absence. Which Naive Bayes type should you use?", "options": [ "Gaussian Naive Bayes", "Multinomial Naive Bayes", "Bernoulli Naive Bayes", "Poisson Naive Bayes" ], "correctAnswerIndex": 2, "explanation": "Bernoulli NB is suitable for binary features." }, { "id": 14, "questionText": "Which problem arises if a feature has zero probability in training data?", "options": [ "Likelihood is unaffected", "Accuracy increases", "Prior probability changes", "Posterior becomes zero, causing prediction failure" ], "correctAnswerIndex": 3, "explanation": "Zero probability leads to a posterior of zero. Laplace smoothing is used to avoid this." }, { "id": 15, "questionText": "What is Laplace smoothing used for in Naive Bayes?", "options": [ "To normalize features", "To handle zero probabilities", "To scale continuous features", "To reduce dimensionality" ], "correctAnswerIndex": 1, "explanation": "Laplace smoothing adds a small value to feature counts to avoid zero probabilities." }, { "id": 16, "questionText": "Scenario: You apply Naive Bayes to a spam detection problem. What is the target variable?", "options": [ "Document length", "Email class (spam or not spam)", "Feature importance", "Word frequency" ], "correctAnswerIndex": 1, "explanation": "The target variable is the class label to predict, e.g., spam or ham." }, { "id": 17, "questionText": "Scenario: In text classification, why do we use log probabilities in Naive Bayes?", "options": [ "To prevent underflow from multiplying many small probabilities", "To ignore irrelevant words", "To increase accuracy", "To normalize features" ], "correctAnswerIndex": 0, "explanation": "Log probabilities convert multiplication into addition, avoiding numerical underflow." }, { "id": 18, "questionText": "Which is a limitation of Naive Bayes?", "options": [ "Cannot handle categorical data", "Requires large datasets only", "Does not use prior probabilities", "Assumes feature independence which is often violated" ], "correctAnswerIndex": 3, "explanation": "The independence assumption may not hold, potentially reducing accuracy." }, { "id": 19, "questionText": "Which scenario favors Naive Bayes despite its independence assumption?", "options": [ "Complex regression tasks", "Time-series prediction", "Text classification", "Image classification with correlated pixels" ], "correctAnswerIndex": 2, "explanation": "Naive Bayes performs surprisingly well for text classification even when features are not fully independent." }, { "id": 20, "questionText": "Which term in Bayes’ theorem represents evidence?", "options": [ "P(features|class)", "P(class)", "P(class|features)", "P(features)" ], "correctAnswerIndex": 3, "explanation": "Evidence is P(features), used to normalize posterior probabilities." }, { "id": 21, "questionText": "Scenario: You want to classify news articles. Which preprocessing step helps Naive Bayes?", "options": [ "Ignoring word frequencies", "Tokenization and stop-word removal", "Adding irrelevant words", "Random feature shuffling" ], "correctAnswerIndex": 1, "explanation": "Tokenization and stop-word removal reduce noise and improve feature quality." }, { "id": 22, "questionText": "Scenario: You notice some features dominate predictions. What can help?", "options": [ "Add Laplace smoothing", "Increase k", "Remove prior probabilities", "Feature scaling or normalization" ], "correctAnswerIndex": 3, "explanation": "Scaling features ensures no single feature dominates posterior computation." }, { "id": 23, "questionText": "Which is a benefit of Naive Bayes?", "options": [ "Handles missing values automatically", "Works only on balanced datasets", "Fast to train and predict", "Always accurate" ], "correctAnswerIndex": 2, "explanation": "Naive Bayes is computationally efficient and works well with large datasets." }, { "id": 24, "questionText": "Scenario: Multinomial Naive Bayes is applied to short text documents. What could help?", "options": [ "TF-IDF feature weighting", "Use raw counts only", "Ignore feature scaling", "Shuffle classes randomly" ], "correctAnswerIndex": 0, "explanation": "TF-IDF emphasizes informative words and improves classification accuracy." }, { "id": 25, "questionText": "Scenario: You apply Gaussian NB but features are not Gaussian. What is likely?", "options": [ "Model may underperform", "Features are transformed automatically", "Posterior probabilities are exact", "Accuracy improves" ], "correctAnswerIndex": 0, "explanation": "Gaussian NB assumes normal distribution; violations can reduce accuracy." }, { "id": 26, "questionText": "Which step avoids zero probability for unseen feature values in training?", "options": [ "Laplace smoothing", "Feature scaling", "Normalization only", "Random shuffling" ], "correctAnswerIndex": 0, "explanation": "Laplace smoothing adds a small constant to feature counts." }, { "id": 27, "questionText": "Scenario: Two classes have very different sample sizes. Which helps?", "options": [ "Setting all priors equal", "Random shuffling", "Using priors proportional to class frequencies", "Ignoring class sizes" ], "correctAnswerIndex": 2, "explanation": "Prior probabilities account for class imbalance in prediction." }, { "id": 28, "questionText": "Scenario: Features are correlated. What is the effect on Naive Bayes?", "options": [ "Posterior probabilities remain exact", "Independence assumption is violated, may reduce accuracy", "Model ignores correlation automatically", "Accuracy improves" ], "correctAnswerIndex": 1, "explanation": "Naive Bayes assumes independence; correlations can reduce prediction reliability." }, { "id": 29, "questionText": "Which probability is directly used to choose class label in Naive Bayes?", "options": [ "Likelihood only", "Evidence only", "Posterior probability", "Prior probability only" ], "correctAnswerIndex": 2, "explanation": "Class with highest posterior probability is chosen as prediction." }, { "id": 30, "questionText": "Scenario: Naive Bayes is applied to multi-class classification. How is prediction done?", "options": [ "Compute posterior for each class and select maximum", "Use only the first class", "Choose class randomly", "Average class probabilities" ], "correctAnswerIndex": 0, "explanation": "Posterior probabilities are computed for each class; the one with the highest is selected." }, { "id": 31, "questionText": "Scenario: In email spam detection, which feature representation works best with Multinomial NB?", "options": [ "Random numbers", "Raw characters", "Word count or TF-IDF vectors", "Binary features only" ], "correctAnswerIndex": 2, "explanation": "Multinomial NB handles count-based features like word frequencies effectively." }, { "id": 32, "questionText": "Which smoothing method prevents zero probability in Naive Bayes?", "options": [ "Z-score normalization", "PCA", "Laplace smoothing", "Min-max scaling" ], "correctAnswerIndex": 2, "explanation": "Laplace smoothing adds a small value to feature counts, avoiding zero probability for unseen features." }, { "id": 33, "questionText": "Scenario: You have continuous features with non-Gaussian distribution. Which strategy is suitable?", "options": [ "Use Bernoulli NB", "Discretize features or use kernel density estimation", "Ignore feature distribution", "Use Gaussian NB without changes" ], "correctAnswerIndex": 1, "explanation": "Discretization or kernel density estimation allows NB to handle non-Gaussian continuous data." }, { "id": 34, "questionText": "Which assumption does Multinomial Naive Bayes make about features?", "options": [ "All features are binary", "Features are correlated", "Features represent counts/frequencies and are independent", "Features are continuous" ], "correctAnswerIndex": 2, "explanation": "Multinomial NB assumes independent counts/frequencies for each feature per class." }, { "id": 35, "questionText": "Scenario: You apply Naive Bayes to a dataset with missing categorical features. What is an effective approach?", "options": [ "Use Gaussian NB", "Replace with random values", "Ignore missing data", "Treat missing values as a separate category" ], "correctAnswerIndex": 3, "explanation": "Treating missing data as a separate category allows NB to include them in probability computation." }, { "id": 36, "questionText": "Scenario: You apply Laplace smoothing with alpha=1. What does alpha control?", "options": [ "Amount added to feature counts to avoid zero probability", "Learning rate", "Number of neighbors", "Feature scaling factor" ], "correctAnswerIndex": 0, "explanation": "Alpha determines the additive smoothing applied to counts to handle unseen feature values." }, { "id": 37, "questionText": "Scenario: Two features are highly correlated. How does Naive Bayes handle this?", "options": [ "Weights one feature higher", "Automatically removes one feature", "Merges features into one", "Assumes independence; predictions may be biased" ], "correctAnswerIndex": 3, "explanation": "Naive Bayes ignores correlation, which may reduce accuracy in such cases." }, { "id": 38, "questionText": "Scenario: Using Naive Bayes for sentiment analysis, what preprocessing step helps?", "options": [ "Tokenization, stop-word removal, and stemming", "Shuffling words randomly", "Ignoring word frequencies", "Using raw text only" ], "correctAnswerIndex": 0, "explanation": "Text preprocessing ensures features are meaningful and reduces noise." }, { "id": 39, "questionText": "Scenario: A new category appears in testing data unseen in training. What happens?", "options": [ "Class is automatically ignored", "Prediction remains correct", "Posterior probability becomes zero unless smoothed", "Naive Bayes creates a new class" ], "correctAnswerIndex": 2, "explanation": "Without smoothing, unseen feature categories lead to zero probability and failed predictions." }, { "id": 40, "questionText": "Scenario: Features are categorical with many levels. What helps Naive Bayes performance?", "options": [ "Merging all categories", "Feature encoding and smoothing", "Ignoring levels", "Using Gaussian NB" ], "correctAnswerIndex": 1, "explanation": "Encoding categorical features and smoothing probability estimates improves performance." }, { "id": 41, "questionText": "Scenario: Naive Bayes applied to multi-class document classification. How is probability computed?", "options": [ "Equal probability for all classes", "Posterior probability for each class using prior and likelihood", "Only consider the first class", "Random selection of class" ], "correctAnswerIndex": 1, "explanation": "Posterior is computed for each class and the highest is selected." }, { "id": 42, "questionText": "Scenario: You have imbalanced classes. How to adjust Naive Bayes?", "options": [ "Ignore imbalance", "Reduce feature counts", "Use class priors reflecting class frequencies", "Increase smoothing arbitrarily" ], "correctAnswerIndex": 2, "explanation": "Setting class priors helps account for imbalance in predictions." }, { "id": 43, "questionText": "Scenario: Naive Bayes is applied to continuous and categorical features together. Strategy?", "options": [ "Ignore categorical features", "Use only Multinomial NB", "Use Gaussian NB for continuous, Multinomial/Bernoulli NB for categorical", "Use only Gaussian NB" ], "correctAnswerIndex": 2, "explanation": "Different variants can be combined for mixed-type features." }, { "id": 44, "questionText": "Scenario: High-dimensional text data causes overfitting. What helps?", "options": [ "Feature selection or dimensionality reduction", "Ignore rare words", "Increase Laplace alpha", "Random shuffling of features" ], "correctAnswerIndex": 0, "explanation": "Selecting important features reduces overfitting and improves generalization." }, { "id": 45, "questionText": "Scenario: Two words always appear together in class A. Effect on Naive Bayes?", "options": [ "Posterior probabilities unaffected", "One word ignored", "Independence assumption violated; may affect accuracy", "Model handles correlation automatically" ], "correctAnswerIndex": 2, "explanation": "Correlated features violate independence, potentially reducing prediction reliability." }, { "id": 46, "questionText": "Scenario: Naive Bayes is slow with large vocabulary. What helps?", "options": [ "Increase alpha arbitrarily", "Use raw counts only", "Feature selection or TF-IDF weighting", "Shuffle training data" ], "correctAnswerIndex": 2, "explanation": "Reducing feature size or weighting reduces computation and improves performance." }, { "id": 47, "questionText": "Scenario: Text classification with short documents. Which variant works best?", "options": [ "Poisson NB", "Bernoulli NB with raw counts", "Multinomial NB with TF-IDF or word counts", "Gaussian NB" ], "correctAnswerIndex": 2, "explanation": "Short text benefits from count-based Multinomial NB representation." }, { "id": 48, "questionText": "Scenario: Feature appears in all classes equally. Effect?", "options": [ "Feature dominates prediction", "Feature does not help in discriminating classes", "Posterior probability increases", "Naive Bayes ignores automatically" ], "correctAnswerIndex": 1, "explanation": "Features with equal probability across classes do not contribute to classification." }, { "id": 49, "questionText": "Scenario: Multinomial NB predicts probabilities 0.7 for class A and 0.3 for class B. Decision?", "options": [ "Choose class B", "Average the classes", "Random selection", "Choose class A" ], "correctAnswerIndex": 3, "explanation": "Naive Bayes selects the class with the highest posterior probability." }, { "id": 50, "questionText": "Scenario: Features are sparse with many zeros. Which is preferred?", "options": [ "Use raw dense arrays only", "Gaussian NB", "Multinomial or Bernoulli NB with sparse representation", "Ignore zeros" ], "correctAnswerIndex": 2, "explanation": "Sparse-friendly NB variants handle high-dimensional sparse data efficiently." }, { "id": 51, "questionText": "Scenario: You want to explain predictions. Which Naive Bayes property helps?", "options": [ "Posterior is ignored", "Model is a black box", "Prior probabilities are hidden", "Feature contributions are interpretable via conditional probabilities" ], "correctAnswerIndex": 3, "explanation": "Conditional probabilities indicate which features most influence predictions." }, { "id": 52, "questionText": "Scenario: Naive Bayes used on reviews. Some rare words exist. Solution?", "options": [ "Normalize counts only", "Apply Laplace smoothing", "Ignore rare words", "Increase k arbitrarily" ], "correctAnswerIndex": 1, "explanation": "Smoothing ensures rare or unseen words do not result in zero probability." }, { "id": 53, "questionText": "Scenario: Features are normalized to 0-1. Effect on Multinomial NB?", "options": [ "Feature scaling automatically helps", "Posterior probabilities unaffected", "Accuracy improves", "Counts should remain integer; normalization may reduce effectiveness" ], "correctAnswerIndex": 3, "explanation": "Multinomial NB expects count data; normalization may distort probabilities." }, { "id": 54, "questionText": "Scenario: You have continuous features. Which transformation may help Gaussian NB?", "options": [ "Ignore continuous nature", "Binary encode features", "Log-transform to reduce skewness", "Shuffle values randomly" ], "correctAnswerIndex": 2, "explanation": "Transforming skewed data closer to Gaussian improves model fit." }, { "id": 55, "questionText": "Scenario: Two classes overlap heavily. Naive Bayes accuracy?", "options": [ "Model ignores overlap", "Increases automatically", "Reduced due to similar likelihoods", "Independent features help perfectly" ], "correctAnswerIndex": 2, "explanation": "When classes overlap, posterior probabilities may be close, leading to misclassification." }, { "id": 56, "questionText": "Scenario: You want to combine Gaussian and Multinomial features. Strategy?", "options": [ "Use a hybrid NB model handling each type separately", "Use Gaussian NB for all", "Ignore one type", "Convert all to counts" ], "correctAnswerIndex": 0, "explanation": "Hybrid NB allows handling mixed feature types properly." }, { "id": 57, "questionText": "Scenario: Some features are highly informative, others noisy. Strategy?", "options": [ "Keep all features", "Increase alpha", "Feature selection to keep informative features", "Randomly drop features" ], "correctAnswerIndex": 2, "explanation": "Selecting informative features improves classification and reduces noise influence." }, { "id": 58, "questionText": "Scenario: Words with high frequency in all classes. Effect?", "options": [ "Dominate prediction positively", "Provide little discrimination; may be removed", "Model ignores automatically", "Posterior probabilities increase" ], "correctAnswerIndex": 1, "explanation": "Common words like 'the' or 'and' do not help differentiate classes." }, { "id": 59, "questionText": "Scenario: Test data has unseen word features. What is required?", "options": [ "Gaussian NB handles automatically", "Remove prior probabilities", "Ignore unseen words", "Apply Laplace smoothing" ], "correctAnswerIndex": 3, "explanation": "Smoothing ensures unseen words do not produce zero posterior probability." }, { "id": 60, "questionText": "Scenario: You want probabilities instead of class labels. Naive Bayes output?", "options": [ "Only prior probability", "Only class label", "Posterior probability for each class", "Only likelihood" ], "correctAnswerIndex": 2, "explanation": "NB computes posterior probabilities, which can be used directly or thresholded for classification." }, { "id": 61, "questionText": "Scenario: Words co-occur frequently within a class. Effect?", "options": [ "Class probabilities unaffected", "NB ignores co-occurrence", "Independence assumption violated; may reduce accuracy", "Prediction improves automatically" ], "correctAnswerIndex": 2, "explanation": "Correlated features violate NB assumption; may bias predictions." }, { "id": 62, "questionText": "Scenario: Multiclass NB with 10 classes. How to predict?", "options": [ "Compute posterior for each class; choose maximum", "Average class probabilities", "Random class selection", "Use only first class" ], "correctAnswerIndex": 0, "explanation": "Posterior probabilities guide selection of most probable class." }, { "id": 63, "questionText": "Scenario: Some features have very low variance. Effect on Gaussian NB?", "options": [ "Model ignores feature automatically", "Posterior probability increases", "May have little impact; small variance reduces feature importance", "Feature dominates prediction" ], "correctAnswerIndex": 2, "explanation": "Low-variance features contribute less to posterior probability." }, { "id": 64, "questionText": "Scenario: Sparse categorical features with many unseen values. What helps?", "options": [ "Randomly shuffle features", "Ignore rare categories", "Smoothing and proper encoding", "Use Gaussian NB" ], "correctAnswerIndex": 2, "explanation": "Smoothing and encoding unseen categories allow proper posterior computation." }, { "id": 65, "questionText": "Scenario: Words occur in multiple classes with similar frequency. Effect?", "options": [ "Model ignores feature automatically", "Feature provides little discriminative power", "Posterior probabilities increase", "Feature dominates prediction" ], "correctAnswerIndex": 1, "explanation": "Non-informative features do not help classification." }, { "id": 66, "questionText": "Scenario: Features are scaled differently. Effect on Gaussian NB?", "options": [ "Feature scaling ignored", "NB unaffected", "Scaling impacts Gaussian NB since variance and mean are computed per feature", "Posterior remains exact" ], "correctAnswerIndex": 2, "explanation": "Scaling changes mean/variance; proper preprocessing ensures meaningful probabilities." }, { "id": 67, "questionText": "Scenario: Class conditional distributions overlap. Accuracy?", "options": [ "NB ignores overlap", "Reduced due to similar likelihoods", "Increases automatically", "Posterior probabilities exact" ], "correctAnswerIndex": 1, "explanation": "Overlap reduces discriminative power, increasing misclassification." }, { "id": 68, "questionText": "Scenario: Combining NB with feature selection. Effect?", "options": [ "Reduces accuracy", "Prior probabilities change", "Reduces noise and improves accuracy", "Ignored features dominate" ], "correctAnswerIndex": 2, "explanation": "Selecting important features improves model generalization." }, { "id": 69, "questionText": "Scenario: Naive Bayes for movie genre prediction. Some features missing. Strategy?", "options": [ "Gaussian NB only", "Ignore data row", "Randomly replace", "Treat missing as separate category or impute" ], "correctAnswerIndex": 3, "explanation": "Missing categorical features are handled as separate category or imputed to compute posterior." }, { "id": 70, "questionText": "Scenario: Rare feature appears in all classes equally. Impact?", "options": [ "Feature contributes little to classification", "Feature dominates prediction", "Posterior probability increases", "NB ignores automatically" ], "correctAnswerIndex": 0, "explanation": "Features with equal class frequency have minimal discriminative value." }, { "id": 71, "questionText": "Scenario: You have highly imbalanced classes. What is a good strategy with Naive Bayes?", "options": [ "Increase Laplace smoothing arbitrarily", "Use only majority class", "Ignore imbalance", "Adjust class priors according to class frequencies" ], "correctAnswerIndex": 3, "explanation": "Adjusting class priors ensures the model accounts for imbalance in predictions." }, { "id": 72, "questionText": "Scenario: Two features are strongly correlated. What is the effect on Naive Bayes?", "options": [ "NB automatically decorrelates features", "Independence assumption violated; may reduce accuracy", "Posterior remains exact", "Accuracy improves" ], "correctAnswerIndex": 1, "explanation": "Naive Bayes assumes independence. Correlated features may bias predictions." }, { "id": 73, "questionText": "Scenario: You are predicting rare disease presence. Most patients are healthy. Which is critical?", "options": [ "Class priors and threshold adjustment", "Use Gaussian NB for all", "Ignore rare class", "Increase feature counts" ], "correctAnswerIndex": 0, "explanation": "Rare class predictions require careful handling of priors and decision thresholds." }, { "id": 74, "questionText": "Scenario: Multi-class text classification with many rare words. Strategy?", "options": [ "Ignore rare words", "Use Laplace smoothing and possibly TF-IDF", "Use Gaussian NB", "Shuffle features" ], "correctAnswerIndex": 1, "explanation": "Smoothing and weighting rare words prevents zero probabilities and improves generalization." }, { "id": 75, "questionText": "Scenario: Continuous features are skewed. What improves Gaussian NB?", "options": [ "Use Bernoulli NB instead", "Ignore skewness", "Log or Box-Cox transformation to approximate Gaussian distribution", "Normalize 0–1" ], "correctAnswerIndex": 2, "explanation": "Transforming skewed features closer to Gaussian improves model assumptions and accuracy." }, { "id": 76, "questionText": "Scenario: Text classification. Some words appear in every class equally. Effect?", "options": [ "Dominates predictions", "NB ignores automatically", "Little discriminative value; may be removed", "Posterior increases" ], "correctAnswerIndex": 2, "explanation": "Features with equal class frequency do not help differentiate classes." }, { "id": 77, "questionText": "Scenario: Combining continuous and categorical features in one dataset. Strategy?", "options": [ "Use hybrid NB (Gaussian for continuous, Multinomial/Bernoulli for categorical)", "Convert all to counts", "Ignore one type", "Use Gaussian NB only" ], "correctAnswerIndex": 0, "explanation": "Hybrid NB allows proper handling of mixed feature types." }, { "id": 78, "questionText": "Scenario: Naive Bayes applied on streaming data with changing distributions. Strategy?", "options": [ "Use Gaussian NB only", "Ignore distribution change", "Randomly drop old data", "Retrain periodically or use incremental NB" ], "correctAnswerIndex": 3, "explanation": "Incremental learning or periodic retraining adapts to distribution shifts in streaming data." }, { "id": 79, "questionText": "Scenario: High-dimensional sparse data. What optimization helps?", "options": [ "Sparse representation and feature selection", "Shuffle features", "Increase Laplace alpha only", "Use raw dense matrix" ], "correctAnswerIndex": 0, "explanation": "Sparse storage and feature selection reduce computation and memory use." }, { "id": 80, "questionText": "Scenario: Words co-occur frequently within a class. Effect?", "options": [ "Violates independence; may bias predictions", "Improves accuracy automatically", "NB ignores co-occurrence", "Posterior unchanged" ], "correctAnswerIndex": 0, "explanation": "Correlated features violate NB assumption; predictions may be biased." }, { "id": 81, "questionText": "Scenario: Large vocabulary with many zero-count features. How to handle?", "options": [ "Use Laplace smoothing", "Remove zeros arbitrarily", "Use Gaussian NB", "Ignore rare features" ], "correctAnswerIndex": 0, "explanation": "Smoothing ensures zero-count features do not yield zero probability." }, { "id": 82, "questionText": "Scenario: Test data has unseen feature categories. Solution?", "options": [ "Apply Laplace smoothing or treat as unknown category", "Randomly assign values", "Use Gaussian NB", "Ignore unseen categories" ], "correctAnswerIndex": 0, "explanation": "Smoothing allows unseen categories to be incorporated safely." }, { "id": 83, "questionText": "Scenario: Overlapping class distributions. Naive Bayes accuracy?", "options": [ "NB ignores overlap", "Posterior exact", "Improves automatically", "Reduced due to similar likelihoods" ], "correctAnswerIndex": 3, "explanation": "Overlap reduces discriminative power, increasing misclassification risk." }, { "id": 84, "questionText": "Scenario: NB output shows posterior probabilities 0.51 vs 0.49. Interpretation?", "options": [ "Prediction is exact", "Model is uncertain; threshold adjustment may help", "Ignore probabilities", "Choose lower class" ], "correctAnswerIndex": 1, "explanation": "Close probabilities indicate uncertainty; thresholds or confidence measures can improve decision-making." }, { "id": 85, "questionText": "Scenario: Gaussian NB feature has extremely low variance. Effect?", "options": [ "Posterior increases", "Feature dominates prediction", "Ignored automatically", "Feature contributes little; may be ignored" ], "correctAnswerIndex": 3, "explanation": "Low-variance features have minimal impact on posterior probability." }, { "id": 86, "questionText": "Scenario: Multi-class NB with 15 classes. Prediction method?", "options": [ "Average probabilities", "Use only first class", "Compute posterior for each class and choose maximum", "Choose randomly" ], "correctAnswerIndex": 2, "explanation": "The class with the highest posterior probability is selected." }, { "id": 87, "questionText": "Scenario: NB applied on mixed numeric and categorical features. Preprocessing?", "options": [ "Ignore one feature type", "Normalize all", "Gaussian for numeric, Multinomial/Bernoulli for categorical", "Convert numeric to binary" ], "correctAnswerIndex": 2, "explanation": "Proper variant selection ensures correct probability calculation." }, { "id": 88, "questionText": "Scenario: Rare features appear in training but not testing. How to handle?", "options": [ "Apply smoothing to prevent zero probability", "Randomly assign probabilities", "Ignore rare features", "Use Gaussian NB" ], "correctAnswerIndex": 0, "explanation": "Smoothing prevents zero posterior for rare or unseen features." }, { "id": 89, "questionText": "Scenario: Continuous features heavily skewed. Best approach?", "options": [ "Convert to binary", "Use only categorical NB", "Log-transform to approximate Gaussian", "Ignore skewness" ], "correctAnswerIndex": 2, "explanation": "Transforming skewed continuous features improves Gaussian NB assumptions." }, { "id": 90, "questionText": "Scenario: Text classification with highly frequent words like 'the'. What should you do?", "options": [ "Increase Laplace alpha", "Remove stop words", "Keep all words", "Randomly shuffle" ], "correctAnswerIndex": 1, "explanation": "Stop-word removal prevents common non-informative words from dominating probabilities." }, { "id": 91, "questionText": "Scenario: NB used on streaming data with evolving distribution. What helps?", "options": [ "Ignore drift", "Use Gaussian NB only", "Incremental NB or periodic retraining", "Discard old data" ], "correctAnswerIndex": 2, "explanation": "Incremental learning adapts the model to changing feature distributions." }, { "id": 92, "questionText": "Scenario: Words appear together in many documents (correlation). Effect?", "options": [ "NB ignores correlation", "Posterior unaffected", "Violates independence; may reduce accuracy", "Improves accuracy" ], "correctAnswerIndex": 2, "explanation": "Correlated features violate the conditional independence assumption." }, { "id": 93, "questionText": "Scenario: Multi-class NB. One class has very few examples. Strategy?", "options": [ "Use priors and smoothing to handle small classes", "Duplicate small class", "Ignore small class", "Remove features" ], "correctAnswerIndex": 0, "explanation": "Small classes require careful handling of priors and smoothing to avoid misclassification." }, { "id": 94, "questionText": "Scenario: Mixed sparse and dense features. Optimization?", "options": [ "Use sparse representation for sparse features", "Convert all to dense", "Ignore sparse features", "Use only Gaussian NB" ], "correctAnswerIndex": 0, "explanation": "Sparse storage reduces memory and computation costs." }, { "id": 95, "questionText": "Scenario: NB misclassifies some classes consistently. Probable cause?", "options": [ "Posterior probabilities are exact", "Model ignores priors", "Independence assumption violated or poor feature selection", "Smoothing too high" ], "correctAnswerIndex": 2, "explanation": "Feature correlation or irrelevant features can bias predictions." }, { "id": 96, "questionText": "Scenario: Gaussian NB on features with large range differences. What is required?", "options": [ "Keep raw values", "Apply Laplace smoothing", "Random shuffling", "Standardize or normalize features" ], "correctAnswerIndex": 3, "explanation": "Feature scaling ensures Gaussian parameters are meaningful." }, { "id": 97, "questionText": "Scenario: NB for sentiment analysis with short documents. Strategy?", "options": [ "Poisson NB", "Use Multinomial NB with TF-IDF or counts", "Use Gaussian NB", "Bernoulli NB with raw counts" ], "correctAnswerIndex": 1, "explanation": "Short text benefits from count-based representation." }, { "id": 98, "questionText": "Scenario: Feature occurs frequently in all classes. Effect?", "options": [ "NB ignores automatically", "Posterior probability increases", "Provides little discriminative power; may be removed", "Dominates prediction" ], "correctAnswerIndex": 2, "explanation": "Non-informative features do not help classification." }, { "id": 99, "questionText": "Scenario: NB applied on multi-lingual text. Strategy?", "options": [ "Ignore language differences", "Merge all text blindly", "Separate feature sets per language or use language-independent features", "Use Gaussian NB" ], "correctAnswerIndex": 2, "explanation": "Language-specific preprocessing ensures meaningful feature extraction." }, { "id": 100, "questionText": "Scenario: You want to explain which features influenced prediction. Which NB property helps?", "options": [ "Only prior matters", "Conditional probabilities show feature contributions", "Posterior probabilities ignored", "Model is black-box" ], "correctAnswerIndex": 1, "explanation": "Conditional probabilities indicate how each feature contributes to the posterior probability." } ] }