MachineLearningAlgorithms / data /Naive-Bayes.json
deedrop1140's picture
Upload 41 files
0d00d62 verified
{
"title": "Naive Bayes Mastery: 100 MCQs",
"description": "A comprehensive set of 100 multiple-choice questions to test and deepen your understanding of Naive Bayes classifiers, from fundamental concepts to advanced real-world applications and challenges.",
"questions": [
{
"id": 1,
"questionText": "What is the core assumption of Naive Bayes?",
"options": [
"All classes have equal probability",
"The dataset is balanced",
"Features are correlated",
"Features are independent given the class label"
],
"correctAnswerIndex": 3,
"explanation": "Naive Bayes assumes conditional independence of features given the class, which simplifies probability computation."
},
{
"id": 2,
"questionText": "Which theorem is Naive Bayes based on?",
"options": [
"Markov Theorem",
"Pythagoras Theorem",
"Central Limit Theorem",
"Bayes' Theorem"
],
"correctAnswerIndex": 3,
"explanation": "Naive Bayes uses Bayes’ Theorem to compute posterior probabilities for classification."
},
{
"id": 3,
"questionText": "In Naive Bayes, what is the 'prior probability'?",
"options": [
"Probability of each class before observing features",
"Probability of features given the class",
"Probability of misclassification",
"Conditional probability of test data"
],
"correctAnswerIndex": 0,
"explanation": "The prior is the initial probability of each class based on the training dataset."
},
{
"id": 4,
"questionText": "Which type of Naive Bayes is suitable for text data?",
"options": [
"Gaussian Naive Bayes",
"Bernoulli Naive Bayes",
"Multinomial Naive Bayes",
"Poisson Naive Bayes"
],
"correctAnswerIndex": 2,
"explanation": "Multinomial NB works well for text features, as it handles word frequencies."
},
{
"id": 5,
"questionText": "Which Naive Bayes variant is used for binary features?",
"options": [
"Gaussian Naive Bayes",
"Bernoulli Naive Bayes",
"Poisson Naive Bayes",
"Multinomial Naive Bayes"
],
"correctAnswerIndex": 1,
"explanation": "Bernoulli NB models binary presence/absence features effectively."
},
{
"id": 6,
"questionText": "In Gaussian Naive Bayes, features are assumed to follow which distribution?",
"options": [
"Uniform distribution",
"Normal (Gaussian) distribution",
"Exponential distribution",
"Poisson distribution"
],
"correctAnswerIndex": 1,
"explanation": "Gaussian NB models continuous features using a normal distribution."
},
{
"id": 7,
"questionText": "What is 'likelihood' in Naive Bayes?",
"options": [
"Probability of features given the class",
"Posterior probability",
"Prior probability",
"Probability of the class given features"
],
"correctAnswerIndex": 0,
"explanation": "Likelihood is P(features|class) used in Bayes’ formula to compute posterior probability."
},
{
"id": 8,
"questionText": "Which probability does Naive Bayes calculate to make predictions?",
"options": [
"Prior probability only",
"Posterior probability P(class|features)",
"Feature probability only",
"Joint probability of all classes"
],
"correctAnswerIndex": 1,
"explanation": "Naive Bayes computes the posterior probability for each class and selects the class with the highest value."
},
{
"id": 9,
"questionText": "Why is it called 'Naive' Bayes?",
"options": [
"Because it is simple to implement",
"Because it assumes feature independence",
"Because it only works on small datasets",
"Because it ignores class labels"
],
"correctAnswerIndex": 1,
"explanation": "The method is 'naive' due to its strong assumption that features are independent given the class."
},
{
"id": 10,
"questionText": "Which metric is commonly used to evaluate Naive Bayes classifiers?",
"options": [
"R-squared value",
"Euclidean distance",
"Accuracy, Precision, Recall, F1-score",
"Mean squared error"
],
"correctAnswerIndex": 2,
"explanation": "Classification metrics like accuracy, precision, recall, and F1-score are used to evaluate Naive Bayes performance."
},
{
"id": 11,
"questionText": "Scenario: You have continuous features with Gaussian distribution. Which Naive Bayes variant is suitable?",
"options": [
"Bernoulli Naive Bayes",
"Gaussian Naive Bayes",
"Multinomial Naive Bayes",
"Poisson Naive Bayes"
],
"correctAnswerIndex": 1,
"explanation": "Gaussian NB models continuous features using the mean and variance of each class."
},
{
"id": 12,
"questionText": "Scenario: Your dataset has counts of words per document. Which Naive Bayes is ideal?",
"options": [
"Bernoulli Naive Bayes",
"Multinomial Naive Bayes",
"Gaussian Naive Bayes",
"Poisson Naive Bayes"
],
"correctAnswerIndex": 1,
"explanation": "Multinomial NB handles discrete count data such as word frequencies."
},
{
"id": 13,
"questionText": "Scenario: You have binary features indicating presence or absence. Which Naive Bayes type should you use?",
"options": [
"Gaussian Naive Bayes",
"Multinomial Naive Bayes",
"Bernoulli Naive Bayes",
"Poisson Naive Bayes"
],
"correctAnswerIndex": 2,
"explanation": "Bernoulli NB is suitable for binary features."
},
{
"id": 14,
"questionText": "Which problem arises if a feature has zero probability in training data?",
"options": [
"Likelihood is unaffected",
"Accuracy increases",
"Prior probability changes",
"Posterior becomes zero, causing prediction failure"
],
"correctAnswerIndex": 3,
"explanation": "Zero probability leads to a posterior of zero. Laplace smoothing is used to avoid this."
},
{
"id": 15,
"questionText": "What is Laplace smoothing used for in Naive Bayes?",
"options": [
"To normalize features",
"To handle zero probabilities",
"To scale continuous features",
"To reduce dimensionality"
],
"correctAnswerIndex": 1,
"explanation": "Laplace smoothing adds a small value to feature counts to avoid zero probabilities."
},
{
"id": 16,
"questionText": "Scenario: You apply Naive Bayes to a spam detection problem. What is the target variable?",
"options": [
"Document length",
"Email class (spam or not spam)",
"Feature importance",
"Word frequency"
],
"correctAnswerIndex": 1,
"explanation": "The target variable is the class label to predict, e.g., spam or ham."
},
{
"id": 17,
"questionText": "Scenario: In text classification, why do we use log probabilities in Naive Bayes?",
"options": [
"To prevent underflow from multiplying many small probabilities",
"To ignore irrelevant words",
"To increase accuracy",
"To normalize features"
],
"correctAnswerIndex": 0,
"explanation": "Log probabilities convert multiplication into addition, avoiding numerical underflow."
},
{
"id": 18,
"questionText": "Which is a limitation of Naive Bayes?",
"options": [
"Cannot handle categorical data",
"Requires large datasets only",
"Does not use prior probabilities",
"Assumes feature independence which is often violated"
],
"correctAnswerIndex": 3,
"explanation": "The independence assumption may not hold, potentially reducing accuracy."
},
{
"id": 19,
"questionText": "Which scenario favors Naive Bayes despite its independence assumption?",
"options": [
"Complex regression tasks",
"Time-series prediction",
"Text classification",
"Image classification with correlated pixels"
],
"correctAnswerIndex": 2,
"explanation": "Naive Bayes performs surprisingly well for text classification even when features are not fully independent."
},
{
"id": 20,
"questionText": "Which term in Bayes’ theorem represents evidence?",
"options": [
"P(features|class)",
"P(class)",
"P(class|features)",
"P(features)"
],
"correctAnswerIndex": 3,
"explanation": "Evidence is P(features), used to normalize posterior probabilities."
},
{
"id": 21,
"questionText": "Scenario: You want to classify news articles. Which preprocessing step helps Naive Bayes?",
"options": [
"Ignoring word frequencies",
"Tokenization and stop-word removal",
"Adding irrelevant words",
"Random feature shuffling"
],
"correctAnswerIndex": 1,
"explanation": "Tokenization and stop-word removal reduce noise and improve feature quality."
},
{
"id": 22,
"questionText": "Scenario: You notice some features dominate predictions. What can help?",
"options": [
"Add Laplace smoothing",
"Increase k",
"Remove prior probabilities",
"Feature scaling or normalization"
],
"correctAnswerIndex": 3,
"explanation": "Scaling features ensures no single feature dominates posterior computation."
},
{
"id": 23,
"questionText": "Which is a benefit of Naive Bayes?",
"options": [
"Handles missing values automatically",
"Works only on balanced datasets",
"Fast to train and predict",
"Always accurate"
],
"correctAnswerIndex": 2,
"explanation": "Naive Bayes is computationally efficient and works well with large datasets."
},
{
"id": 24,
"questionText": "Scenario: Multinomial Naive Bayes is applied to short text documents. What could help?",
"options": [
"TF-IDF feature weighting",
"Use raw counts only",
"Ignore feature scaling",
"Shuffle classes randomly"
],
"correctAnswerIndex": 0,
"explanation": "TF-IDF emphasizes informative words and improves classification accuracy."
},
{
"id": 25,
"questionText": "Scenario: You apply Gaussian NB but features are not Gaussian. What is likely?",
"options": [
"Model may underperform",
"Features are transformed automatically",
"Posterior probabilities are exact",
"Accuracy improves"
],
"correctAnswerIndex": 0,
"explanation": "Gaussian NB assumes normal distribution; violations can reduce accuracy."
},
{
"id": 26,
"questionText": "Which step avoids zero probability for unseen feature values in training?",
"options": [
"Laplace smoothing",
"Feature scaling",
"Normalization only",
"Random shuffling"
],
"correctAnswerIndex": 0,
"explanation": "Laplace smoothing adds a small constant to feature counts."
},
{
"id": 27,
"questionText": "Scenario: Two classes have very different sample sizes. Which helps?",
"options": [
"Setting all priors equal",
"Random shuffling",
"Using priors proportional to class frequencies",
"Ignoring class sizes"
],
"correctAnswerIndex": 2,
"explanation": "Prior probabilities account for class imbalance in prediction."
},
{
"id": 28,
"questionText": "Scenario: Features are correlated. What is the effect on Naive Bayes?",
"options": [
"Posterior probabilities remain exact",
"Independence assumption is violated, may reduce accuracy",
"Model ignores correlation automatically",
"Accuracy improves"
],
"correctAnswerIndex": 1,
"explanation": "Naive Bayes assumes independence; correlations can reduce prediction reliability."
},
{
"id": 29,
"questionText": "Which probability is directly used to choose class label in Naive Bayes?",
"options": [
"Likelihood only",
"Evidence only",
"Posterior probability",
"Prior probability only"
],
"correctAnswerIndex": 2,
"explanation": "Class with highest posterior probability is chosen as prediction."
},
{
"id": 30,
"questionText": "Scenario: Naive Bayes is applied to multi-class classification. How is prediction done?",
"options": [
"Compute posterior for each class and select maximum",
"Use only the first class",
"Choose class randomly",
"Average class probabilities"
],
"correctAnswerIndex": 0,
"explanation": "Posterior probabilities are computed for each class; the one with the highest is selected."
},
{
"id": 31,
"questionText": "Scenario: In email spam detection, which feature representation works best with Multinomial NB?",
"options": [
"Random numbers",
"Raw characters",
"Word count or TF-IDF vectors",
"Binary features only"
],
"correctAnswerIndex": 2,
"explanation": "Multinomial NB handles count-based features like word frequencies effectively."
},
{
"id": 32,
"questionText": "Which smoothing method prevents zero probability in Naive Bayes?",
"options": [
"Z-score normalization",
"PCA",
"Laplace smoothing",
"Min-max scaling"
],
"correctAnswerIndex": 2,
"explanation": "Laplace smoothing adds a small value to feature counts, avoiding zero probability for unseen features."
},
{
"id": 33,
"questionText": "Scenario: You have continuous features with non-Gaussian distribution. Which strategy is suitable?",
"options": [
"Use Bernoulli NB",
"Discretize features or use kernel density estimation",
"Ignore feature distribution",
"Use Gaussian NB without changes"
],
"correctAnswerIndex": 1,
"explanation": "Discretization or kernel density estimation allows NB to handle non-Gaussian continuous data."
},
{
"id": 34,
"questionText": "Which assumption does Multinomial Naive Bayes make about features?",
"options": [
"All features are binary",
"Features are correlated",
"Features represent counts/frequencies and are independent",
"Features are continuous"
],
"correctAnswerIndex": 2,
"explanation": "Multinomial NB assumes independent counts/frequencies for each feature per class."
},
{
"id": 35,
"questionText": "Scenario: You apply Naive Bayes to a dataset with missing categorical features. What is an effective approach?",
"options": [
"Use Gaussian NB",
"Replace with random values",
"Ignore missing data",
"Treat missing values as a separate category"
],
"correctAnswerIndex": 3,
"explanation": "Treating missing data as a separate category allows NB to include them in probability computation."
},
{
"id": 36,
"questionText": "Scenario: You apply Laplace smoothing with alpha=1. What does alpha control?",
"options": [
"Amount added to feature counts to avoid zero probability",
"Learning rate",
"Number of neighbors",
"Feature scaling factor"
],
"correctAnswerIndex": 0,
"explanation": "Alpha determines the additive smoothing applied to counts to handle unseen feature values."
},
{
"id": 37,
"questionText": "Scenario: Two features are highly correlated. How does Naive Bayes handle this?",
"options": [
"Weights one feature higher",
"Automatically removes one feature",
"Merges features into one",
"Assumes independence; predictions may be biased"
],
"correctAnswerIndex": 3,
"explanation": "Naive Bayes ignores correlation, which may reduce accuracy in such cases."
},
{
"id": 38,
"questionText": "Scenario: Using Naive Bayes for sentiment analysis, what preprocessing step helps?",
"options": [
"Tokenization, stop-word removal, and stemming",
"Shuffling words randomly",
"Ignoring word frequencies",
"Using raw text only"
],
"correctAnswerIndex": 0,
"explanation": "Text preprocessing ensures features are meaningful and reduces noise."
},
{
"id": 39,
"questionText": "Scenario: A new category appears in testing data unseen in training. What happens?",
"options": [
"Class is automatically ignored",
"Prediction remains correct",
"Posterior probability becomes zero unless smoothed",
"Naive Bayes creates a new class"
],
"correctAnswerIndex": 2,
"explanation": "Without smoothing, unseen feature categories lead to zero probability and failed predictions."
},
{
"id": 40,
"questionText": "Scenario: Features are categorical with many levels. What helps Naive Bayes performance?",
"options": [
"Merging all categories",
"Feature encoding and smoothing",
"Ignoring levels",
"Using Gaussian NB"
],
"correctAnswerIndex": 1,
"explanation": "Encoding categorical features and smoothing probability estimates improves performance."
},
{
"id": 41,
"questionText": "Scenario: Naive Bayes applied to multi-class document classification. How is probability computed?",
"options": [
"Equal probability for all classes",
"Posterior probability for each class using prior and likelihood",
"Only consider the first class",
"Random selection of class"
],
"correctAnswerIndex": 1,
"explanation": "Posterior is computed for each class and the highest is selected."
},
{
"id": 42,
"questionText": "Scenario: You have imbalanced classes. How to adjust Naive Bayes?",
"options": [
"Ignore imbalance",
"Reduce feature counts",
"Use class priors reflecting class frequencies",
"Increase smoothing arbitrarily"
],
"correctAnswerIndex": 2,
"explanation": "Setting class priors helps account for imbalance in predictions."
},
{
"id": 43,
"questionText": "Scenario: Naive Bayes is applied to continuous and categorical features together. Strategy?",
"options": [
"Ignore categorical features",
"Use only Multinomial NB",
"Use Gaussian NB for continuous, Multinomial/Bernoulli NB for categorical",
"Use only Gaussian NB"
],
"correctAnswerIndex": 2,
"explanation": "Different variants can be combined for mixed-type features."
},
{
"id": 44,
"questionText": "Scenario: High-dimensional text data causes overfitting. What helps?",
"options": [
"Feature selection or dimensionality reduction",
"Ignore rare words",
"Increase Laplace alpha",
"Random shuffling of features"
],
"correctAnswerIndex": 0,
"explanation": "Selecting important features reduces overfitting and improves generalization."
},
{
"id": 45,
"questionText": "Scenario: Two words always appear together in class A. Effect on Naive Bayes?",
"options": [
"Posterior probabilities unaffected",
"One word ignored",
"Independence assumption violated; may affect accuracy",
"Model handles correlation automatically"
],
"correctAnswerIndex": 2,
"explanation": "Correlated features violate independence, potentially reducing prediction reliability."
},
{
"id": 46,
"questionText": "Scenario: Naive Bayes is slow with large vocabulary. What helps?",
"options": [
"Increase alpha arbitrarily",
"Use raw counts only",
"Feature selection or TF-IDF weighting",
"Shuffle training data"
],
"correctAnswerIndex": 2,
"explanation": "Reducing feature size or weighting reduces computation and improves performance."
},
{
"id": 47,
"questionText": "Scenario: Text classification with short documents. Which variant works best?",
"options": [
"Poisson NB",
"Bernoulli NB with raw counts",
"Multinomial NB with TF-IDF or word counts",
"Gaussian NB"
],
"correctAnswerIndex": 2,
"explanation": "Short text benefits from count-based Multinomial NB representation."
},
{
"id": 48,
"questionText": "Scenario: Feature appears in all classes equally. Effect?",
"options": [
"Feature dominates prediction",
"Feature does not help in discriminating classes",
"Posterior probability increases",
"Naive Bayes ignores automatically"
],
"correctAnswerIndex": 1,
"explanation": "Features with equal probability across classes do not contribute to classification."
},
{
"id": 49,
"questionText": "Scenario: Multinomial NB predicts probabilities 0.7 for class A and 0.3 for class B. Decision?",
"options": [
"Choose class B",
"Average the classes",
"Random selection",
"Choose class A"
],
"correctAnswerIndex": 3,
"explanation": "Naive Bayes selects the class with the highest posterior probability."
},
{
"id": 50,
"questionText": "Scenario: Features are sparse with many zeros. Which is preferred?",
"options": [
"Use raw dense arrays only",
"Gaussian NB",
"Multinomial or Bernoulli NB with sparse representation",
"Ignore zeros"
],
"correctAnswerIndex": 2,
"explanation": "Sparse-friendly NB variants handle high-dimensional sparse data efficiently."
},
{
"id": 51,
"questionText": "Scenario: You want to explain predictions. Which Naive Bayes property helps?",
"options": [
"Posterior is ignored",
"Model is a black box",
"Prior probabilities are hidden",
"Feature contributions are interpretable via conditional probabilities"
],
"correctAnswerIndex": 3,
"explanation": "Conditional probabilities indicate which features most influence predictions."
},
{
"id": 52,
"questionText": "Scenario: Naive Bayes used on reviews. Some rare words exist. Solution?",
"options": [
"Normalize counts only",
"Apply Laplace smoothing",
"Ignore rare words",
"Increase k arbitrarily"
],
"correctAnswerIndex": 1,
"explanation": "Smoothing ensures rare or unseen words do not result in zero probability."
},
{
"id": 53,
"questionText": "Scenario: Features are normalized to 0-1. Effect on Multinomial NB?",
"options": [
"Feature scaling automatically helps",
"Posterior probabilities unaffected",
"Accuracy improves",
"Counts should remain integer; normalization may reduce effectiveness"
],
"correctAnswerIndex": 3,
"explanation": "Multinomial NB expects count data; normalization may distort probabilities."
},
{
"id": 54,
"questionText": "Scenario: You have continuous features. Which transformation may help Gaussian NB?",
"options": [
"Ignore continuous nature",
"Binary encode features",
"Log-transform to reduce skewness",
"Shuffle values randomly"
],
"correctAnswerIndex": 2,
"explanation": "Transforming skewed data closer to Gaussian improves model fit."
},
{
"id": 55,
"questionText": "Scenario: Two classes overlap heavily. Naive Bayes accuracy?",
"options": [
"Model ignores overlap",
"Increases automatically",
"Reduced due to similar likelihoods",
"Independent features help perfectly"
],
"correctAnswerIndex": 2,
"explanation": "When classes overlap, posterior probabilities may be close, leading to misclassification."
},
{
"id": 56,
"questionText": "Scenario: You want to combine Gaussian and Multinomial features. Strategy?",
"options": [
"Use a hybrid NB model handling each type separately",
"Use Gaussian NB for all",
"Ignore one type",
"Convert all to counts"
],
"correctAnswerIndex": 0,
"explanation": "Hybrid NB allows handling mixed feature types properly."
},
{
"id": 57,
"questionText": "Scenario: Some features are highly informative, others noisy. Strategy?",
"options": [
"Keep all features",
"Increase alpha",
"Feature selection to keep informative features",
"Randomly drop features"
],
"correctAnswerIndex": 2,
"explanation": "Selecting informative features improves classification and reduces noise influence."
},
{
"id": 58,
"questionText": "Scenario: Words with high frequency in all classes. Effect?",
"options": [
"Dominate prediction positively",
"Provide little discrimination; may be removed",
"Model ignores automatically",
"Posterior probabilities increase"
],
"correctAnswerIndex": 1,
"explanation": "Common words like 'the' or 'and' do not help differentiate classes."
},
{
"id": 59,
"questionText": "Scenario: Test data has unseen word features. What is required?",
"options": [
"Gaussian NB handles automatically",
"Remove prior probabilities",
"Ignore unseen words",
"Apply Laplace smoothing"
],
"correctAnswerIndex": 3,
"explanation": "Smoothing ensures unseen words do not produce zero posterior probability."
},
{
"id": 60,
"questionText": "Scenario: You want probabilities instead of class labels. Naive Bayes output?",
"options": [
"Only prior probability",
"Only class label",
"Posterior probability for each class",
"Only likelihood"
],
"correctAnswerIndex": 2,
"explanation": "NB computes posterior probabilities, which can be used directly or thresholded for classification."
},
{
"id": 61,
"questionText": "Scenario: Words co-occur frequently within a class. Effect?",
"options": [
"Class probabilities unaffected",
"NB ignores co-occurrence",
"Independence assumption violated; may reduce accuracy",
"Prediction improves automatically"
],
"correctAnswerIndex": 2,
"explanation": "Correlated features violate NB assumption; may bias predictions."
},
{
"id": 62,
"questionText": "Scenario: Multiclass NB with 10 classes. How to predict?",
"options": [
"Compute posterior for each class; choose maximum",
"Average class probabilities",
"Random class selection",
"Use only first class"
],
"correctAnswerIndex": 0,
"explanation": "Posterior probabilities guide selection of most probable class."
},
{
"id": 63,
"questionText": "Scenario: Some features have very low variance. Effect on Gaussian NB?",
"options": [
"Model ignores feature automatically",
"Posterior probability increases",
"May have little impact; small variance reduces feature importance",
"Feature dominates prediction"
],
"correctAnswerIndex": 2,
"explanation": "Low-variance features contribute less to posterior probability."
},
{
"id": 64,
"questionText": "Scenario: Sparse categorical features with many unseen values. What helps?",
"options": [
"Randomly shuffle features",
"Ignore rare categories",
"Smoothing and proper encoding",
"Use Gaussian NB"
],
"correctAnswerIndex": 2,
"explanation": "Smoothing and encoding unseen categories allow proper posterior computation."
},
{
"id": 65,
"questionText": "Scenario: Words occur in multiple classes with similar frequency. Effect?",
"options": [
"Model ignores feature automatically",
"Feature provides little discriminative power",
"Posterior probabilities increase",
"Feature dominates prediction"
],
"correctAnswerIndex": 1,
"explanation": "Non-informative features do not help classification."
},
{
"id": 66,
"questionText": "Scenario: Features are scaled differently. Effect on Gaussian NB?",
"options": [
"Feature scaling ignored",
"NB unaffected",
"Scaling impacts Gaussian NB since variance and mean are computed per feature",
"Posterior remains exact"
],
"correctAnswerIndex": 2,
"explanation": "Scaling changes mean/variance; proper preprocessing ensures meaningful probabilities."
},
{
"id": 67,
"questionText": "Scenario: Class conditional distributions overlap. Accuracy?",
"options": [
"NB ignores overlap",
"Reduced due to similar likelihoods",
"Increases automatically",
"Posterior probabilities exact"
],
"correctAnswerIndex": 1,
"explanation": "Overlap reduces discriminative power, increasing misclassification."
},
{
"id": 68,
"questionText": "Scenario: Combining NB with feature selection. Effect?",
"options": [
"Reduces accuracy",
"Prior probabilities change",
"Reduces noise and improves accuracy",
"Ignored features dominate"
],
"correctAnswerIndex": 2,
"explanation": "Selecting important features improves model generalization."
},
{
"id": 69,
"questionText": "Scenario: Naive Bayes for movie genre prediction. Some features missing. Strategy?",
"options": [
"Gaussian NB only",
"Ignore data row",
"Randomly replace",
"Treat missing as separate category or impute"
],
"correctAnswerIndex": 3,
"explanation": "Missing categorical features are handled as separate category or imputed to compute posterior."
},
{
"id": 70,
"questionText": "Scenario: Rare feature appears in all classes equally. Impact?",
"options": [
"Feature contributes little to classification",
"Feature dominates prediction",
"Posterior probability increases",
"NB ignores automatically"
],
"correctAnswerIndex": 0,
"explanation": "Features with equal class frequency have minimal discriminative value."
},
{
"id": 71,
"questionText": "Scenario: You have highly imbalanced classes. What is a good strategy with Naive Bayes?",
"options": [
"Increase Laplace smoothing arbitrarily",
"Use only majority class",
"Ignore imbalance",
"Adjust class priors according to class frequencies"
],
"correctAnswerIndex": 3,
"explanation": "Adjusting class priors ensures the model accounts for imbalance in predictions."
},
{
"id": 72,
"questionText": "Scenario: Two features are strongly correlated. What is the effect on Naive Bayes?",
"options": [
"NB automatically decorrelates features",
"Independence assumption violated; may reduce accuracy",
"Posterior remains exact",
"Accuracy improves"
],
"correctAnswerIndex": 1,
"explanation": "Naive Bayes assumes independence. Correlated features may bias predictions."
},
{
"id": 73,
"questionText": "Scenario: You are predicting rare disease presence. Most patients are healthy. Which is critical?",
"options": [
"Class priors and threshold adjustment",
"Use Gaussian NB for all",
"Ignore rare class",
"Increase feature counts"
],
"correctAnswerIndex": 0,
"explanation": "Rare class predictions require careful handling of priors and decision thresholds."
},
{
"id": 74,
"questionText": "Scenario: Multi-class text classification with many rare words. Strategy?",
"options": [
"Ignore rare words",
"Use Laplace smoothing and possibly TF-IDF",
"Use Gaussian NB",
"Shuffle features"
],
"correctAnswerIndex": 1,
"explanation": "Smoothing and weighting rare words prevents zero probabilities and improves generalization."
},
{
"id": 75,
"questionText": "Scenario: Continuous features are skewed. What improves Gaussian NB?",
"options": [
"Use Bernoulli NB instead",
"Ignore skewness",
"Log or Box-Cox transformation to approximate Gaussian distribution",
"Normalize 0–1"
],
"correctAnswerIndex": 2,
"explanation": "Transforming skewed features closer to Gaussian improves model assumptions and accuracy."
},
{
"id": 76,
"questionText": "Scenario: Text classification. Some words appear in every class equally. Effect?",
"options": [
"Dominates predictions",
"NB ignores automatically",
"Little discriminative value; may be removed",
"Posterior increases"
],
"correctAnswerIndex": 2,
"explanation": "Features with equal class frequency do not help differentiate classes."
},
{
"id": 77,
"questionText": "Scenario: Combining continuous and categorical features in one dataset. Strategy?",
"options": [
"Use hybrid NB (Gaussian for continuous, Multinomial/Bernoulli for categorical)",
"Convert all to counts",
"Ignore one type",
"Use Gaussian NB only"
],
"correctAnswerIndex": 0,
"explanation": "Hybrid NB allows proper handling of mixed feature types."
},
{
"id": 78,
"questionText": "Scenario: Naive Bayes applied on streaming data with changing distributions. Strategy?",
"options": [
"Use Gaussian NB only",
"Ignore distribution change",
"Randomly drop old data",
"Retrain periodically or use incremental NB"
],
"correctAnswerIndex": 3,
"explanation": "Incremental learning or periodic retraining adapts to distribution shifts in streaming data."
},
{
"id": 79,
"questionText": "Scenario: High-dimensional sparse data. What optimization helps?",
"options": [
"Sparse representation and feature selection",
"Shuffle features",
"Increase Laplace alpha only",
"Use raw dense matrix"
],
"correctAnswerIndex": 0,
"explanation": "Sparse storage and feature selection reduce computation and memory use."
},
{
"id": 80,
"questionText": "Scenario: Words co-occur frequently within a class. Effect?",
"options": [
"Violates independence; may bias predictions",
"Improves accuracy automatically",
"NB ignores co-occurrence",
"Posterior unchanged"
],
"correctAnswerIndex": 0,
"explanation": "Correlated features violate NB assumption; predictions may be biased."
},
{
"id": 81,
"questionText": "Scenario: Large vocabulary with many zero-count features. How to handle?",
"options": [
"Use Laplace smoothing",
"Remove zeros arbitrarily",
"Use Gaussian NB",
"Ignore rare features"
],
"correctAnswerIndex": 0,
"explanation": "Smoothing ensures zero-count features do not yield zero probability."
},
{
"id": 82,
"questionText": "Scenario: Test data has unseen feature categories. Solution?",
"options": [
"Apply Laplace smoothing or treat as unknown category",
"Randomly assign values",
"Use Gaussian NB",
"Ignore unseen categories"
],
"correctAnswerIndex": 0,
"explanation": "Smoothing allows unseen categories to be incorporated safely."
},
{
"id": 83,
"questionText": "Scenario: Overlapping class distributions. Naive Bayes accuracy?",
"options": [
"NB ignores overlap",
"Posterior exact",
"Improves automatically",
"Reduced due to similar likelihoods"
],
"correctAnswerIndex": 3,
"explanation": "Overlap reduces discriminative power, increasing misclassification risk."
},
{
"id": 84,
"questionText": "Scenario: NB output shows posterior probabilities 0.51 vs 0.49. Interpretation?",
"options": [
"Prediction is exact",
"Model is uncertain; threshold adjustment may help",
"Ignore probabilities",
"Choose lower class"
],
"correctAnswerIndex": 1,
"explanation": "Close probabilities indicate uncertainty; thresholds or confidence measures can improve decision-making."
},
{
"id": 85,
"questionText": "Scenario: Gaussian NB feature has extremely low variance. Effect?",
"options": [
"Posterior increases",
"Feature dominates prediction",
"Ignored automatically",
"Feature contributes little; may be ignored"
],
"correctAnswerIndex": 3,
"explanation": "Low-variance features have minimal impact on posterior probability."
},
{
"id": 86,
"questionText": "Scenario: Multi-class NB with 15 classes. Prediction method?",
"options": [
"Average probabilities",
"Use only first class",
"Compute posterior for each class and choose maximum",
"Choose randomly"
],
"correctAnswerIndex": 2,
"explanation": "The class with the highest posterior probability is selected."
},
{
"id": 87,
"questionText": "Scenario: NB applied on mixed numeric and categorical features. Preprocessing?",
"options": [
"Ignore one feature type",
"Normalize all",
"Gaussian for numeric, Multinomial/Bernoulli for categorical",
"Convert numeric to binary"
],
"correctAnswerIndex": 2,
"explanation": "Proper variant selection ensures correct probability calculation."
},
{
"id": 88,
"questionText": "Scenario: Rare features appear in training but not testing. How to handle?",
"options": [
"Apply smoothing to prevent zero probability",
"Randomly assign probabilities",
"Ignore rare features",
"Use Gaussian NB"
],
"correctAnswerIndex": 0,
"explanation": "Smoothing prevents zero posterior for rare or unseen features."
},
{
"id": 89,
"questionText": "Scenario: Continuous features heavily skewed. Best approach?",
"options": [
"Convert to binary",
"Use only categorical NB",
"Log-transform to approximate Gaussian",
"Ignore skewness"
],
"correctAnswerIndex": 2,
"explanation": "Transforming skewed continuous features improves Gaussian NB assumptions."
},
{
"id": 90,
"questionText": "Scenario: Text classification with highly frequent words like 'the'. What should you do?",
"options": [
"Increase Laplace alpha",
"Remove stop words",
"Keep all words",
"Randomly shuffle"
],
"correctAnswerIndex": 1,
"explanation": "Stop-word removal prevents common non-informative words from dominating probabilities."
},
{
"id": 91,
"questionText": "Scenario: NB used on streaming data with evolving distribution. What helps?",
"options": [
"Ignore drift",
"Use Gaussian NB only",
"Incremental NB or periodic retraining",
"Discard old data"
],
"correctAnswerIndex": 2,
"explanation": "Incremental learning adapts the model to changing feature distributions."
},
{
"id": 92,
"questionText": "Scenario: Words appear together in many documents (correlation). Effect?",
"options": [
"NB ignores correlation",
"Posterior unaffected",
"Violates independence; may reduce accuracy",
"Improves accuracy"
],
"correctAnswerIndex": 2,
"explanation": "Correlated features violate the conditional independence assumption."
},
{
"id": 93,
"questionText": "Scenario: Multi-class NB. One class has very few examples. Strategy?",
"options": [
"Use priors and smoothing to handle small classes",
"Duplicate small class",
"Ignore small class",
"Remove features"
],
"correctAnswerIndex": 0,
"explanation": "Small classes require careful handling of priors and smoothing to avoid misclassification."
},
{
"id": 94,
"questionText": "Scenario: Mixed sparse and dense features. Optimization?",
"options": [
"Use sparse representation for sparse features",
"Convert all to dense",
"Ignore sparse features",
"Use only Gaussian NB"
],
"correctAnswerIndex": 0,
"explanation": "Sparse storage reduces memory and computation costs."
},
{
"id": 95,
"questionText": "Scenario: NB misclassifies some classes consistently. Probable cause?",
"options": [
"Posterior probabilities are exact",
"Model ignores priors",
"Independence assumption violated or poor feature selection",
"Smoothing too high"
],
"correctAnswerIndex": 2,
"explanation": "Feature correlation or irrelevant features can bias predictions."
},
{
"id": 96,
"questionText": "Scenario: Gaussian NB on features with large range differences. What is required?",
"options": [
"Keep raw values",
"Apply Laplace smoothing",
"Random shuffling",
"Standardize or normalize features"
],
"correctAnswerIndex": 3,
"explanation": "Feature scaling ensures Gaussian parameters are meaningful."
},
{
"id": 97,
"questionText": "Scenario: NB for sentiment analysis with short documents. Strategy?",
"options": [
"Poisson NB",
"Use Multinomial NB with TF-IDF or counts",
"Use Gaussian NB",
"Bernoulli NB with raw counts"
],
"correctAnswerIndex": 1,
"explanation": "Short text benefits from count-based representation."
},
{
"id": 98,
"questionText": "Scenario: Feature occurs frequently in all classes. Effect?",
"options": [
"NB ignores automatically",
"Posterior probability increases",
"Provides little discriminative power; may be removed",
"Dominates prediction"
],
"correctAnswerIndex": 2,
"explanation": "Non-informative features do not help classification."
},
{
"id": 99,
"questionText": "Scenario: NB applied on multi-lingual text. Strategy?",
"options": [
"Ignore language differences",
"Merge all text blindly",
"Separate feature sets per language or use language-independent features",
"Use Gaussian NB"
],
"correctAnswerIndex": 2,
"explanation": "Language-specific preprocessing ensures meaningful feature extraction."
},
{
"id": 100,
"questionText": "Scenario: You want to explain which features influenced prediction. Which NB property helps?",
"options": [
"Only prior matters",
"Conditional probabilities show feature contributions",
"Posterior probabilities ignored",
"Model is black-box"
],
"correctAnswerIndex": 1,
"explanation": "Conditional probabilities indicate how each feature contributes to the posterior probability."
}
]
}