| { | |
| "title": "t-SNE Mastery: 100 MCQs", | |
| "description": "A comprehensive set of 100 multiple-choice questions on t-Distributed Stochastic Neighbor Embedding (t-SNE), covering basic theory, medium-level conceptual understanding, and hard scenario-based applications.", | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "questionText": "What does t-SNE stand for?", | |
| "options": [ | |
| "t-Scaled Neighbor Estimation", | |
| "t-Distributed Stochastic Neighbor Embedding", | |
| "Tensor Stochastic Network Embedding", | |
| "Total Stochastic Neural Embedding" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "t-SNE stands for t-Distributed Stochastic Neighbor Embedding, a technique for dimensionality reduction and visualization of high-dimensional data." | |
| }, | |
| { | |
| "id": 2, | |
| "questionText": "What is the primary purpose of t-SNE?", | |
| "options": [ | |
| "Normalizing data features", | |
| "Generating synthetic data", | |
| "Reducing dimensionality for visualization", | |
| "Classifying high-dimensional data" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "t-SNE is primarily used to reduce high-dimensional data into 2D or 3D for visualization while preserving local structure." | |
| }, | |
| { | |
| "id": 3, | |
| "questionText": "t-SNE is particularly good at preserving which type of data structure?", | |
| "options": [ | |
| "Linear relationships", | |
| "Global distances", | |
| "Local neighborhood structure", | |
| "Class labels" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "t-SNE focuses on preserving local similarities, meaning points that are close in high-dimensional space remain close in low-dimensional space." | |
| }, | |
| { | |
| "id": 4, | |
| "questionText": "What is the usual output dimension for t-SNE visualization?", | |
| "options": [ | |
| "1D", | |
| "10D", | |
| "2D or 3D", | |
| "5D" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "t-SNE is typically used to reduce data to 2D or 3D for easy visualization." | |
| }, | |
| { | |
| "id": 5, | |
| "questionText": "Which probability distribution is used in the low-dimensional space of t-SNE?", | |
| "options": [ | |
| "Student’s t-distribution", | |
| "Gaussian distribution", | |
| "Binomial distribution", | |
| "Uniform distribution" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "t-SNE uses a Student’s t-distribution with one degree of freedom in the low-dimensional space to model pairwise similarities and prevent crowding." | |
| }, | |
| { | |
| "id": 6, | |
| "questionText": "Which cost function does t-SNE minimize?", | |
| "options": [ | |
| "Cross-Entropy", | |
| "Euclidean distance", | |
| "Mean Squared Error", | |
| "Kullback-Leibler divergence" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "t-SNE minimizes the Kullback-Leibler divergence between high-dimensional and low-dimensional probability distributions." | |
| }, | |
| { | |
| "id": 7, | |
| "questionText": "In t-SNE, what is 'perplexity'?", | |
| "options": [ | |
| "Effective number of neighbors considered", | |
| "Learning rate for gradient descent", | |
| "A measure of dataset size", | |
| "Number of output dimensions" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Perplexity controls how many neighbors influence the calculation of similarities; it acts like a smooth measure of the number of neighbors." | |
| }, | |
| { | |
| "id": 8, | |
| "questionText": "t-SNE is best suited for which type of relationships?", | |
| "options": [ | |
| "Only categorical", | |
| "Linear", | |
| "Only continuous labels", | |
| "Non-linear" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "t-SNE captures non-linear relationships that linear methods like PCA may miss." | |
| }, | |
| { | |
| "id": 9, | |
| "questionText": "Which step is part of the t-SNE algorithm?", | |
| "options": [ | |
| "Clustering data into fixed bins", | |
| "Computing pairwise similarities in high-dimensional space", | |
| "Normalizing labels only", | |
| "Sorting features alphabetically" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "t-SNE first computes pairwise similarities between all points in the high-dimensional space." | |
| }, | |
| { | |
| "id": 10, | |
| "questionText": "t-SNE initialization in low-dimensional space is usually:", | |
| "options": [ | |
| "Zero matrix", | |
| "Random", | |
| "Label-based ordering", | |
| "PCA projection" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "t-SNE typically starts with random placement of points in the low-dimensional space." | |
| }, | |
| { | |
| "id": 11, | |
| "questionText": "Which of these is a limitation of t-SNE?", | |
| "options": [ | |
| "Requires categorical labels", | |
| "Does not work on numeric data", | |
| "Does not scale well to very large datasets", | |
| "Cannot handle linear relationships" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "t-SNE can be computationally expensive and memory-intensive for large datasets." | |
| }, | |
| { | |
| "id": 12, | |
| "questionText": "Which t-SNE hyperparameter affects convergence speed?", | |
| "options": [ | |
| "Learning rate", | |
| "Perplexity", | |
| "KL divergence", | |
| "Number of features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "The learning rate determines the step size in gradient descent optimization of the t-SNE cost function." | |
| }, | |
| { | |
| "id": 13, | |
| "questionText": "t-SNE is mainly used for:", | |
| "options": [ | |
| "Prediction", | |
| "Clustering as a main algorithm", | |
| "Classification", | |
| "Dimensionality reduction for visualization" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "t-SNE reduces dimensionality to visualize complex high-dimensional data effectively." | |
| }, | |
| { | |
| "id": 14, | |
| "questionText": "Which of these statements about t-SNE is correct?", | |
| "options": [ | |
| "It only works on 2D input", | |
| "It preserves global distances exactly", | |
| "It produces deterministic results", | |
| "It preserves local neighborhood structure" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "t-SNE focuses on maintaining local structure; global distances may be distorted." | |
| }, | |
| { | |
| "id": 15, | |
| "questionText": "t-SNE reduces crowding in low-dimensional space using:", | |
| "options": [ | |
| "Euclidean distance in high dimension only", | |
| "Gaussian kernel in high dimension, Student’s t-distribution in low dimension", | |
| "Uniform distance mapping", | |
| "PCA initialization only" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Using t-distribution in low dimension with heavy tails helps spread out points to avoid crowding." | |
| }, | |
| { | |
| "id": 16, | |
| "questionText": "t-SNE’s output can vary between runs due to:", | |
| "options": [ | |
| "Random initialization", | |
| "Gradient descent step size", | |
| "Data normalization", | |
| "Perplexity only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Random initialization in low-dimensional space can lead to different local minima in optimization." | |
| }, | |
| { | |
| "id": 17, | |
| "questionText": "Which of these is true about t-SNE and PCA?", | |
| "options": [ | |
| "t-SNE is deterministic like PCA", | |
| "Both capture only linear structures", | |
| "PCA is better for visualization", | |
| "t-SNE captures non-linear structure; PCA is linear" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "t-SNE can capture complex non-linear relationships, whereas PCA preserves only linear variance." | |
| }, | |
| { | |
| "id": 18, | |
| "questionText": "t-SNE is not suitable for:", | |
| "options": [ | |
| "Non-linear data", | |
| "Small datasets", | |
| "Large-scale datasets without optimization", | |
| "2D visualization" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "t-SNE has high computational and memory cost for very large datasets." | |
| }, | |
| { | |
| "id": 19, | |
| "questionText": "Which t-SNE hyperparameter influences the balance between local and global structure?", | |
| "options": [ | |
| "Learning rate", | |
| "Perplexity", | |
| "Output dimension", | |
| "KL divergence" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Perplexity acts as a smooth measure of the number of neighbors, balancing local vs. slightly broader structures." | |
| }, | |
| { | |
| "id": 20, | |
| "questionText": "t-SNE is stochastic because:", | |
| "options": [ | |
| "It uses KL divergence", | |
| "It uses random initialization and gradient descent", | |
| "It uses linear mapping", | |
| "It uses PCA first" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "The combination of random initialization and stochastic optimization leads to variability in results." | |
| }, | |
| { | |
| "id": 21, | |
| "questionText": "Which of these datasets is most appropriate for t-SNE?", | |
| "options": [ | |
| "Low-dimensional 2D data only", | |
| "Empty datasets", | |
| "Categorical data without encoding", | |
| "High-dimensional numeric data for visualization" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "t-SNE is designed to visualize high-dimensional data by projecting it to 2D or 3D." | |
| }, | |
| { | |
| "id": 22, | |
| "questionText": "t-SNE helps in which task indirectly?", | |
| "options": [ | |
| "Label encoding", | |
| "Understanding clusters or patterns", | |
| "Making predictions", | |
| "Model regularization" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "While t-SNE does not perform clustering, it can help visually identify clusters or patterns." | |
| }, | |
| { | |
| "id": 23, | |
| "questionText": "Why does t-SNE use Student’s t-distribution in low dimensions?", | |
| "options": [ | |
| "To handle the 'crowding problem' by allowing heavy tails", | |
| "To increase perplexity", | |
| "To linearize data", | |
| "To simplify computation" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Heavy-tailed t-distribution spreads out points in low-dimensional space, avoiding crowding." | |
| }, | |
| { | |
| "id": 24, | |
| "questionText": "t-SNE is sensitive to which of the following?", | |
| "options": [ | |
| "Number of labels only", | |
| "Dataset size irrelevant", | |
| "Hyperparameters (perplexity, learning rate) and initialization", | |
| "Output dimension only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Small changes in parameters or random initialization can significantly affect t-SNE results." | |
| }, | |
| { | |
| "id": 25, | |
| "questionText": "t-SNE preserves which type of distance?", | |
| "options": [ | |
| "Global Euclidean distance", | |
| "Local pairwise similarity", | |
| "Cosine distance", | |
| "Manhattan distance" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "t-SNE preserves pairwise similarities among neighbors rather than absolute global distances." | |
| }, | |
| { | |
| "id": 26, | |
| "questionText": "Which of these is a recommended practice before t-SNE?", | |
| "options": [ | |
| "Removing labels", | |
| "Standardizing or normalizing features", | |
| "Shuffling the dataset randomly", | |
| "Discretizing continuous features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Feature scaling ensures no single feature dominates pairwise distance calculations." | |
| }, | |
| { | |
| "id": 27, | |
| "questionText": "t-SNE is mainly used in which field?", | |
| "options": [ | |
| "Optimization of hyperparameters", | |
| "Data visualization, exploratory data analysis", | |
| "Regression", | |
| "Prediction" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "t-SNE helps visualize high-dimensional data in 2D or 3D for analysis and pattern detection." | |
| }, | |
| { | |
| "id": 28, | |
| "questionText": "t-SNE is different from PCA because:", | |
| "options": [ | |
| "It is non-linear and focuses on local similarities", | |
| "It reduces to a single principal component", | |
| "It preserves global linear variance", | |
| "It always gives deterministic results" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Unlike PCA, t-SNE focuses on preserving local structure and can capture complex non-linear relationships." | |
| }, | |
| { | |
| "id": 29, | |
| "questionText": "Which of these can be used to accelerate t-SNE on large datasets?", | |
| "options": [ | |
| "Reduce iterations to 1", | |
| "Increase perplexity to maximum", | |
| "Use raw data without scaling", | |
| "Barnes-Hut approximation or FIt-SNE" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Barnes-Hut t-SNE and FIt-SNE optimize computation for larger datasets." | |
| }, | |
| { | |
| "id": 30, | |
| "questionText": "t-SNE is primarily a ______ technique.", | |
| "options": [ | |
| "Clustering algorithm", | |
| "Regression", | |
| "Classification", | |
| "Visualization and dimensionality reduction" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "t-SNE is mainly used to reduce dimensionality of data for visualization purposes." | |
| }, | |
| { | |
| "id": 31, | |
| "questionText": "t-SNE uses which similarity measure in high-dimensional space?", | |
| "options": [ | |
| "Conditional probability based on Gaussian distribution", | |
| "Hamming distance", | |
| "Manhattan distance", | |
| "Cosine similarity" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "t-SNE converts pairwise distances into conditional probabilities using a Gaussian distribution to represent similarity in high-dimensional space." | |
| }, | |
| { | |
| "id": 32, | |
| "questionText": "Scenario: You increase t-SNE perplexity from 5 to 50. Likely effect?", | |
| "options": [ | |
| "KL divergence becomes zero", | |
| "Clusters appear tighter and more separated", | |
| "Clusters merge, representing broader neighborhood", | |
| "Visualization fails" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Higher perplexity considers more neighbors, leading to a broader view of local structure and sometimes merging of clusters." | |
| }, | |
| { | |
| "id": 33, | |
| "questionText": "t-SNE output varies between runs due to:", | |
| "options": [ | |
| "Variance scaling", | |
| "Perplexity normalization", | |
| "Gradient descent randomness and initialization", | |
| "Feature selection" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Random initialization combined with stochastic gradient descent optimization can lead to different results in different runs." | |
| }, | |
| { | |
| "id": 34, | |
| "questionText": "Scenario: You apply t-SNE to 1000-dimensional word embeddings. Best practice?", | |
| "options": [ | |
| "Normalize only labels", | |
| "Discard half of the words randomly", | |
| "Optionally perform PCA first to reduce dimensions before t-SNE", | |
| "Apply t-SNE directly without scaling" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Using PCA first reduces noise and computation while retaining most variance, improving t-SNE performance on high-dimensional embeddings." | |
| }, | |
| { | |
| "id": 35, | |
| "questionText": "t-SNE is sensitive to which hyperparameters?", | |
| "options": [ | |
| "Perplexity, learning rate, number of iterations", | |
| "Data type", | |
| "Number of output labels only", | |
| "PCA components only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Perplexity, learning rate, and iterations significantly influence the optimization and visualization outcome." | |
| }, | |
| { | |
| "id": 36, | |
| "questionText": "Scenario: t-SNE shows distorted global distances. Reason?", | |
| "options": [ | |
| "t-SNE focuses on preserving local structure, not global distances", | |
| "Data not normalized", | |
| "Algorithm failed", | |
| "Number of components is wrong" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "t-SNE prioritizes local similarity preservation; global distances may be distorted in low-dimensional visualization." | |
| }, | |
| { | |
| "id": 37, | |
| "questionText": "t-SNE uses which distribution in low-dimensional space to compute similarities?", | |
| "options": [ | |
| "Student’s t-distribution", | |
| "Poisson", | |
| "Uniform", | |
| "Gaussian" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "A heavy-tailed Student’s t-distribution is used to avoid crowding in low-dimensional embeddings." | |
| }, | |
| { | |
| "id": 38, | |
| "questionText": "Scenario: t-SNE applied to small dataset, clusters overlap in 2D. Possible reason?", | |
| "options": [ | |
| "Learning rate too small", | |
| "All of the above", | |
| "Data scaled incorrectly", | |
| "Perplexity too high" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Perplexity, learning rate, and feature scaling all affect t-SNE output; poor tuning can cause cluster overlap." | |
| }, | |
| { | |
| "id": 39, | |
| "questionText": "t-SNE reduces dimensionality mainly for:", | |
| "options": [ | |
| "Prediction accuracy", | |
| "Label generation", | |
| "Visualization of high-dimensional patterns", | |
| "Feature elimination" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "t-SNE helps visualize complex high-dimensional data by reducing it to 2D or 3D while preserving local structure." | |
| }, | |
| { | |
| "id": 40, | |
| "questionText": "Scenario: t-SNE shows similar points far apart. Likely cause?", | |
| "options": [ | |
| "Random initialization", | |
| "All of the above", | |
| "Insufficient iterations", | |
| "Improper perplexity or learning rate" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "All these factors can distort local relationships in low-dimensional mapping." | |
| }, | |
| { | |
| "id": 41, | |
| "questionText": "Scenario: High-dimensional clusters not visible after t-SNE. Solution?", | |
| "options": [ | |
| "Use PCA for pre-reduction", | |
| "Tune perplexity and learning rate", | |
| "All of the above", | |
| "Increase iterations" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Proper hyperparameter tuning, PCA pre-reduction, and enough iterations improve cluster separation." | |
| }, | |
| { | |
| "id": 42, | |
| "questionText": "t-SNE optimization uses which method?", | |
| "options": [ | |
| "Gradient descent", | |
| "Random selection", | |
| "Eigen decomposition only", | |
| "Closed-form solution" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "t-SNE minimizes KL divergence using iterative gradient descent." | |
| }, | |
| { | |
| "id": 43, | |
| "questionText": "Scenario: You use t-SNE on image embeddings, clusters appear inconsistent. Recommendation?", | |
| "options": [ | |
| "Reduce iterations", | |
| "Use raw pixels without embeddings", | |
| "Change output dimension to 1D", | |
| "Repeat multiple runs and average or use PCA initialization" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Due to randomness, multiple runs or PCA initialization can stabilize t-SNE visualization." | |
| }, | |
| { | |
| "id": 44, | |
| "questionText": "Scenario: t-SNE applied after PCA with 50 components. Benefit?", | |
| "options": [ | |
| "Generates labels", | |
| "Prevents convergence", | |
| "Distorts local structure", | |
| "Reduces noise and computation" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "PCA pre-reduction helps t-SNE handle high-dimensional data efficiently while preserving structure." | |
| }, | |
| { | |
| "id": 45, | |
| "questionText": "t-SNE is mainly affected by:", | |
| "options": [ | |
| "Number of labels", | |
| "Hyperparameters and data scaling", | |
| "Dataset name", | |
| "Feature type only" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "t-SNE results are sensitive to perplexity, learning rate, iterations, and proper feature scaling." | |
| }, | |
| { | |
| "id": 46, | |
| "questionText": "Scenario: t-SNE clusters different classes but distorts distances. Interpretation?", | |
| "options": [ | |
| "Data incorrect", | |
| "Local structure preserved; global distances may differ", | |
| "Output dimension wrong", | |
| "Algorithm failed" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "t-SNE emphasizes local neighbor relations, which can distort large-scale global distances." | |
| }, | |
| { | |
| "id": 47, | |
| "questionText": "t-SNE is not ideal for:", | |
| "options": [ | |
| "Exploring patterns", | |
| "Small datasets", | |
| "Extremely large datasets without optimization", | |
| "Visualizing embeddings" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "t-SNE has high computational cost for very large datasets, though optimized versions exist." | |
| }, | |
| { | |
| "id": 48, | |
| "questionText": "Scenario: Learning rate too high in t-SNE. Effect?", | |
| "options": [ | |
| "All of the above", | |
| "Optimization diverges, poor visualization", | |
| "Slower convergence", | |
| "Better cluster separation" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Excessively high learning rate can prevent gradient descent from converging, causing chaotic mapping." | |
| }, | |
| { | |
| "id": 49, | |
| "questionText": "Scenario: Low perplexity used on dense dataset. Effect?", | |
| "options": [ | |
| "Improves convergence", | |
| "All points overlap", | |
| "Merges clusters", | |
| "Overemphasizes very local structure, clusters may fragment" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Low perplexity focuses on few neighbors, possibly fragmenting clusters that are globally coherent." | |
| }, | |
| { | |
| "id": 50, | |
| "questionText": "Scenario: t-SNE applied to gene expression data for visualization. Useful because?", | |
| "options": [ | |
| "Generates labels", | |
| "Predicts outcomes", | |
| "Reduces features for training", | |
| "Highlights local patterns and clusters of similar samples" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "t-SNE reveals underlying patterns in high-dimensional gene expression data." | |
| }, | |
| { | |
| "id": 51, | |
| "questionText": "Scenario: After applying t-SNE, some clusters appear elongated. Likely cause?", | |
| "options": [ | |
| "Perplexity or learning rate not optimal", | |
| "Insufficient iterations", | |
| "Random initialization", | |
| "All of the above" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Cluster shape distortions can result from improper hyperparameters, initialization, or insufficient optimization steps." | |
| }, | |
| { | |
| "id": 52, | |
| "questionText": "Scenario: t-SNE shows overlapping clusters for distinct classes. Recommended action?", | |
| "options": [ | |
| "Reduce dataset size", | |
| "Increase output dimensions beyond 3", | |
| "Use raw data without scaling", | |
| "Adjust perplexity or learning rate, or try PCA initialization" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Hyperparameter tuning and proper initialization help better separate clusters in low-dimensional mapping." | |
| }, | |
| { | |
| "id": 53, | |
| "questionText": "t-SNE can be combined with PCA to:", | |
| "options": [ | |
| "Replace t-SNE entirely", | |
| "Increase perplexity automatically", | |
| "Reduce noise and dimensionality before t-SNE", | |
| "Generate class labels" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Using PCA first reduces high-dimensional noise, improving t-SNE efficiency and visualization quality." | |
| }, | |
| { | |
| "id": 54, | |
| "questionText": "Scenario: t-SNE on image embeddings produces different plots on repeated runs. Reason?", | |
| "options": [ | |
| "Data scaling issues", | |
| "Random initialization and stochastic gradient descent", | |
| "Perplexity too low", | |
| "Output dimension is too small" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Variability is due to random initialization and stochastic optimization inherent to t-SNE." | |
| }, | |
| { | |
| "id": 55, | |
| "questionText": "Scenario: t-SNE applied to word embeddings shows tight clusters merging. Likely reason?", | |
| "options": [ | |
| "KL divergence minimized", | |
| "Learning rate too low", | |
| "Perplexity too high, considering more neighbors", | |
| "Insufficient iterations" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "High perplexity broadens the neighborhood, causing close clusters to merge visually." | |
| }, | |
| { | |
| "id": 56, | |
| "questionText": "Scenario: Large dataset t-SNE visualization is slow. Solution?", | |
| "options": [ | |
| "Reduce perplexity to 1", | |
| "Increase learning rate to max", | |
| "Use Barnes-Hut t-SNE or FIt-SNE approximation", | |
| "Use raw data without normalization" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Optimized t-SNE versions like Barnes-Hut or FIt-SNE reduce computation and memory cost for large datasets." | |
| }, | |
| { | |
| "id": 57, | |
| "questionText": "Scenario: t-SNE on 100-dimensional embeddings, output 2D. Why might global distances be inaccurate?", | |
| "options": [ | |
| "Incorrect PCA initialization", | |
| "t-SNE prioritizes local neighborhood preservation over global distances", | |
| "Random features selected", | |
| "Algorithm failed" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "t-SNE focuses on preserving local similarities; global distances may be distorted in low-dimensional space." | |
| }, | |
| { | |
| "id": 58, | |
| "questionText": "Scenario: t-SNE applied on noisy dataset. Recommended preprocessing?", | |
| "options": [ | |
| "Normalize or standardize features, optionally reduce noise with PCA", | |
| "Reduce output dimension to 1D", | |
| "Leave data raw", | |
| "Increase perplexity to max" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Scaling and dimensionality reduction improve t-SNE’s ability to capture meaningful structure." | |
| }, | |
| { | |
| "id": 59, | |
| "questionText": "Scenario: Two similar clusters in high-dimensional space are far apart in t-SNE plot. Likely reason?", | |
| "options": [ | |
| "Data labeling issues", | |
| "Algorithm failure", | |
| "Insufficient perplexity or learning rate tuning", | |
| "Incorrect output dimension" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Improper hyperparameters can distort low-dimensional mapping even if local structure is partially preserved." | |
| }, | |
| { | |
| "id": 60, | |
| "questionText": "t-SNE can indirectly help in which of these tasks?", | |
| "options": [ | |
| "Direct prediction", | |
| "Visual identification of clusters or patterns", | |
| "Label encoding", | |
| "Feature selection for regression" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "While not a clustering method itself, t-SNE helps visually identify clusters or patterns in high-dimensional data." | |
| }, | |
| { | |
| "id": 61, | |
| "questionText": "Scenario: You want to visualize 10,000 points with t-SNE but it is slow. Best practice?", | |
| "options": [ | |
| "Increase perplexity to 1000", | |
| "Use optimized versions like FIt-SNE or reduce dimensionality with PCA first", | |
| "Randomly remove half the points", | |
| "Reduce iterations to 10" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Optimized algorithms or PCA pre-reduction improve t-SNE performance on large datasets." | |
| }, | |
| { | |
| "id": 62, | |
| "questionText": "Scenario: t-SNE applied on text embeddings shows random patterns. Likely cause?", | |
| "options": [ | |
| "Low-dimensional output", | |
| "Dataset too large", | |
| "Random initialization and inappropriate hyperparameters", | |
| "Data normalization applied" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Random initialization combined with suboptimal perplexity or learning rate can produce unstable visualizations." | |
| }, | |
| { | |
| "id": 63, | |
| "questionText": "t-SNE is particularly useful when:", | |
| "options": [ | |
| "High-dimensional data visualization is needed", | |
| "Regression is required", | |
| "Prediction is the goal", | |
| "Clustering as a main task" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "t-SNE is designed for visualization of complex, high-dimensional datasets." | |
| }, | |
| { | |
| "id": 64, | |
| "questionText": "Scenario: t-SNE clusters appear overlapping even after PCA pre-reduction. Recommendation?", | |
| "options": [ | |
| "Reduce dataset size further", | |
| "Decrease output dimension to 1D", | |
| "Tune perplexity and learning rate, or increase iterations", | |
| "Switch to raw data" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Hyperparameter tuning is key to achieving better separation in t-SNE visualizations." | |
| }, | |
| { | |
| "id": 65, | |
| "questionText": "Scenario: t-SNE visualization is chaotic. Possible reasons?", | |
| "options": [ | |
| "High learning rate, low perplexity, random initialization", | |
| "PCA used for pre-reduction", | |
| "Data normalization applied", | |
| "Output dimension too large" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Improper hyperparameters and random initialization can produce poor or chaotic t-SNE plots." | |
| }, | |
| { | |
| "id": 66, | |
| "questionText": "Scenario: You reduce embeddings to 2D with t-SNE, but clusters not apparent. Next step?", | |
| "options": [ | |
| "Increase dataset size", | |
| "Change output to 1D", | |
| "Use raw data only", | |
| "Adjust perplexity, learning rate, or perform PCA first" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Hyperparameter tuning and preprocessing like PCA can help reveal clusters in t-SNE plots." | |
| }, | |
| { | |
| "id": 67, | |
| "questionText": "Scenario: t-SNE applied to 300-dimensional image embeddings, some clusters scattered. Likely reason?", | |
| "options": [ | |
| "All of the above", | |
| "Learning rate too low", | |
| "High-dimensional noise, consider PCA pre-reduction", | |
| "Perplexity too high" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Noise and improper hyperparameters can scatter clusters; preprocessing and tuning are essential." | |
| }, | |
| { | |
| "id": 68, | |
| "questionText": "t-SNE preserves local distances by converting pairwise distances to:", | |
| "options": [ | |
| "Probabilities using Gaussian in high-d and t-distribution in low-d", | |
| "Manhattan distance only", | |
| "Euclidean distances only", | |
| "Cosine similarity only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Pairwise distances are converted to conditional probabilities in high-d, and Student’s t-distribution in low-d preserves local similarity." | |
| }, | |
| { | |
| "id": 69, | |
| "questionText": "Scenario: You run t-SNE multiple times and get slightly different plots. How to improve consistency?", | |
| "options": [ | |
| "Increase output dimension to 5D", | |
| "Decrease dataset size", | |
| "Normalize labels only", | |
| "Use PCA initialization and fix random seed" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "PCA initialization and fixing random seed reduce variability in t-SNE visualization." | |
| }, | |
| { | |
| "id": 70, | |
| "questionText": "Scenario: t-SNE produces compressed clusters in center. Likely cause?", | |
| "options": [ | |
| "Crowding problem in low-dimensional space", | |
| "Algorithm failure", | |
| "Learning rate too low", | |
| "Perplexity too high" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "The crowding problem arises because high-dimensional neighborhoods cannot be perfectly represented in low-dimensional space, causing compression." | |
| }, | |
| { | |
| "id": 71, | |
| "questionText": "Scenario: You apply t-SNE on 10,000 image embeddings and clusters appear noisy. Which is the best approach?", | |
| "options": [ | |
| "Use PCA to reduce dimensions before t-SNE and tune perplexity", | |
| "Increase learning rate to maximum", | |
| "Use raw pixel values directly", | |
| "Reduce output dimensions to 1D" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "PCA pre-reduction reduces noise and dimensionality, improving t-SNE visualization on large datasets." | |
| }, | |
| { | |
| "id": 72, | |
| "questionText": "Scenario: t-SNE on text embeddings shows overlapping topics. Likely cause?", | |
| "options": [ | |
| "Perplexity too low or high, or insufficient iterations", | |
| "Data normalization applied", | |
| "Output dimension too high", | |
| "Embedding size too small" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Hyperparameter tuning is essential; low/high perplexity or insufficient iterations can cause overlapping clusters." | |
| }, | |
| { | |
| "id": 73, | |
| "questionText": "Scenario: Two clusters in high-dimensional space appear merged in t-SNE plot. What can you do?", | |
| "options": [ | |
| "Adjust perplexity, learning rate, or use PCA initialization", | |
| "Increase output dimension to 5D", | |
| "Normalize labels", | |
| "Reduce dataset size randomly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Proper hyperparameter tuning and PCA initialization can help separate clusters that appear merged in low-dimensional mapping." | |
| }, | |
| { | |
| "id": 74, | |
| "questionText": "Scenario: Running t-SNE on genomic data, you notice small clusters isolated. Reason?", | |
| "options": [ | |
| "Perplexity may be low, emphasizing very local neighborhoods", | |
| "High learning rate", | |
| "Output dimension too high", | |
| "Data normalization missing" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Low perplexity focuses on very local neighborhoods, potentially isolating small clusters in visualization." | |
| }, | |
| { | |
| "id": 75, | |
| "questionText": "Scenario: t-SNE produces different visualizations on repeated runs. How to stabilize?", | |
| "options": [ | |
| "Use PCA initialization and fix random seed", | |
| "Reduce dataset size", | |
| "Increase output dimension beyond 3D", | |
| "Use raw data without scaling" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "PCA initialization and setting a fixed random seed reduce stochastic variation in t-SNE results." | |
| }, | |
| { | |
| "id": 76, | |
| "questionText": "Scenario: t-SNE on high-dimensional sensor data shows tight clusters but global distances are distorted. Interpretation?", | |
| "options": [ | |
| "Local structure preserved; global distances are not maintained", | |
| "Algorithm failed", | |
| "Data incorrectly scaled", | |
| "Output dimension wrong" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "t-SNE preserves local pairwise relationships; global distances can appear distorted in 2D/3D visualization." | |
| }, | |
| { | |
| "id": 77, | |
| "questionText": "Scenario: Clusters appear fragmented after t-SNE on customer embeddings. Likely reason?", | |
| "options": [ | |
| "Perplexity too low", | |
| "Learning rate too high", | |
| "Data normalization missing", | |
| "All of the above" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Low perplexity, high learning rate, or improper feature scaling can fragment clusters in t-SNE plots." | |
| }, | |
| { | |
| "id": 78, | |
| "questionText": "Scenario: Applying t-SNE to visualize embeddings after deep learning model training. Best preprocessing?", | |
| "options": [ | |
| "Normalize features and optionally use PCA to reduce dimensions", | |
| "Use raw embeddings directly", | |
| "Randomly shuffle dimensions", | |
| "Use first two features only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Normalization and PCA pre-reduction enhance t-SNE visualization quality for deep embeddings." | |
| }, | |
| { | |
| "id": 79, | |
| "questionText": "Scenario: Clusters appear compressed in center of t-SNE plot. Likely cause?", | |
| "options": [ | |
| "Crowding problem inherent to low-dimensional mapping", | |
| "Algorithm failed", | |
| "Perplexity too high", | |
| "Learning rate too low" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Crowding problem occurs because high-dimensional neighborhoods cannot be perfectly represented in low dimensions, causing compression." | |
| }, | |
| { | |
| "id": 80, | |
| "questionText": "Scenario: After t-SNE, similar data points are far apart in 2D. Likely reason?", | |
| "options": [ | |
| "Hyperparameters not tuned correctly", | |
| "Data normalization failed", | |
| "Output dimension too high", | |
| "Labels missing" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Incorrect perplexity, learning rate, or insufficient iterations can cause similar points to appear far apart." | |
| }, | |
| { | |
| "id": 81, | |
| "questionText": "Scenario: t-SNE on embeddings shows elongated clusters. Best action?", | |
| "options": [ | |
| "Adjust perplexity and learning rate, or increase iterations", | |
| "Reduce output dimension to 1D", | |
| "Use raw embeddings without scaling", | |
| "Remove random points" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Cluster elongation often occurs due to suboptimal hyperparameters; tuning and more iterations can improve results." | |
| }, | |
| { | |
| "id": 82, | |
| "questionText": "Scenario: Visualizing 50,000 text embeddings with t-SNE is very slow. Solution?", | |
| "options": [ | |
| "Use FIt-SNE or Barnes-Hut t-SNE for faster computation", | |
| "Reduce perplexity to 1", | |
| "Use raw text instead of embeddings", | |
| "Decrease output dimensions to 1D" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Optimized t-SNE implementations significantly speed up visualization of large datasets." | |
| }, | |
| { | |
| "id": 83, | |
| "questionText": "Scenario: t-SNE clusters overlap despite tuning. Next step?", | |
| "options": [ | |
| "Consider alternative dimensionality reduction methods like UMAP", | |
| "Reduce output dimensions further", | |
| "Use raw data without embeddings", | |
| "Remove labels" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "If t-SNE cannot separate clusters even after tuning, UMAP or other DR methods might better preserve structure." | |
| }, | |
| { | |
| "id": 84, | |
| "questionText": "Scenario: t-SNE on protein expression data shows some scattered clusters. Likely cause?", | |
| "options": [ | |
| "Noise in high-dimensional data", | |
| "Insufficient iterations", | |
| "Suboptimal hyperparameters", | |
| "All of the above" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Noise and suboptimal hyperparameters can cause scattered clusters; preprocessing and tuning help visualization." | |
| }, | |
| { | |
| "id": 85, | |
| "questionText": "Scenario: You apply t-SNE after PCA to reduce 500D embeddings to 50D. Why?", | |
| "options": [ | |
| "Reduce computation and noise while retaining important variance", | |
| "Increase global distance preservation", | |
| "Generate labels automatically", | |
| "Visualize in 3D directly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "PCA pre-reduction helps t-SNE efficiently process high-dimensional data while keeping meaningful structure." | |
| }, | |
| { | |
| "id": 86, | |
| "questionText": "Scenario: Clusters appear stretched along a single axis. Likely cause?", | |
| "options": [ | |
| "Perplexity too high or learning rate too low", | |
| "Algorithm failure", | |
| "Data normalization missing", | |
| "Incorrect output dimension" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Improper hyperparameters can cause cluster elongation in low-dimensional embeddings." | |
| }, | |
| { | |
| "id": 87, | |
| "questionText": "Scenario: t-SNE applied to 300-dimensional embeddings of customer behavior. Output 2D. What can distort clusters?", | |
| "options": [ | |
| "Random initialization, hyperparameters, noisy features", | |
| "Data scaling applied", | |
| "Output dimension too high", | |
| "Label missing" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Cluster distortion occurs due to noise, initialization randomness, and hyperparameter settings." | |
| }, | |
| { | |
| "id": 88, | |
| "questionText": "Scenario: t-SNE output differs between runs. Best practice to make consistent?", | |
| "options": [ | |
| "Fix random seed and use PCA initialization", | |
| "Use raw data directly", | |
| "Reduce output dimension to 1D", | |
| "Change KL divergence formula" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Fixed random seed and PCA initialization reduce stochastic variation across runs." | |
| }, | |
| { | |
| "id": 89, | |
| "questionText": "Scenario: t-SNE applied to embeddings shows overlapping clusters, perplexity set to 5. Recommendation?", | |
| "options": [ | |
| "Increase perplexity to consider more neighbors", | |
| "Reduce learning rate", | |
| "Decrease output dimension to 1D", | |
| "Use raw high-dimensional features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Low perplexity can underrepresent neighborhood structure; increasing perplexity may separate clusters better." | |
| }, | |
| { | |
| "id": 90, | |
| "questionText": "Scenario: After t-SNE, similar embeddings appear scattered. Likely hyperparameter issue?", | |
| "options": [ | |
| "Learning rate too high or perplexity not optimal", | |
| "Output dimension too high", | |
| "Labels missing", | |
| "Data normalized incorrectly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Improper learning rate or perplexity can scatter similar points, reducing visualization quality." | |
| }, | |
| { | |
| "id": 91, | |
| "questionText": "Scenario: t-SNE shows different cluster sizes for similar data. Why?", | |
| "options": [ | |
| "Local density differences and crowding problem in low-dimensional space", | |
| "Algorithm failure", | |
| "Incorrect output dimension", | |
| "Data normalization missing" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "t-SNE preserves local structure; dense regions appear larger and sparse regions smaller due to crowding problem." | |
| }, | |
| { | |
| "id": 92, | |
| "questionText": "Scenario: You want faster t-SNE on 100,000 points. Recommendation?", | |
| "options": [ | |
| "Use Barnes-Hut or FIt-SNE approximation", | |
| "Reduce output dimension to 1D", | |
| "Use raw data without scaling", | |
| "Decrease perplexity to 1" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Optimized t-SNE versions reduce computation and memory for large datasets." | |
| }, | |
| { | |
| "id": 93, | |
| "questionText": "Scenario: t-SNE shows elongated clusters. Likely hyperparameter adjustment?", | |
| "options": [ | |
| "Adjust perplexity and learning rate, or increase iterations", | |
| "Reduce dataset size", | |
| "Use raw data", | |
| "Remove features randomly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Cluster elongation often occurs due to improper hyperparameters; tuning can improve visualization." | |
| }, | |
| { | |
| "id": 94, | |
| "questionText": "Scenario: t-SNE on embedding shows isolated points far from clusters. Likely reason?", | |
| "options": [ | |
| "Outliers or low perplexity emphasizing local neighborhoods", | |
| "Algorithm failure", | |
| "Output dimension too high", | |
| "Data normalization missing" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Outliers or very low perplexity can cause points to appear isolated in visualization." | |
| }, | |
| { | |
| "id": 95, | |
| "questionText": "Scenario: You want t-SNE results reproducible across runs. Steps?", | |
| "options": [ | |
| "Fix random seed, use PCA initialization, standardize features", | |
| "Increase output dimensions", | |
| "Reduce dataset size", | |
| "Use raw data" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Reproducibility requires controlling randomness and preprocessing consistently." | |
| }, | |
| { | |
| "id": 96, | |
| "questionText": "Scenario: t-SNE applied to multi-class embeddings, some classes overlap. Best solution?", | |
| "options": [ | |
| "Tune perplexity, learning rate, or try PCA initialization", | |
| "Reduce number of classes", | |
| "Change output to 1D", | |
| "Use raw features without preprocessing" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Hyperparameter tuning and PCA initialization often improve cluster separation for multi-class data." | |
| }, | |
| { | |
| "id": 97, | |
| "questionText": "Scenario: t-SNE visualization shows tight clusters compressed together. Likely cause?", | |
| "options": [ | |
| "Crowding problem and insufficient perplexity", | |
| "Algorithm failure", | |
| "Output dimension too high", | |
| "Data not normalized" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Crowding problem causes clusters to compress in low-dimensional space, especially with suboptimal perplexity." | |
| }, | |
| { | |
| "id": 98, | |
| "questionText": "Scenario: You apply t-SNE on embeddings with high noise. Recommended step?", | |
| "options": [ | |
| "Denoise or reduce dimensionality with PCA before t-SNE", | |
| "Use raw embeddings", | |
| "Reduce output dimension to 1D", | |
| "Increase iterations without preprocessing" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Preprocessing helps t-SNE focus on meaningful structure rather than noise." | |
| }, | |
| { | |
| "id": 99, | |
| "questionText": "Scenario: t-SNE visualization shows variable cluster shapes between runs. Solution?", | |
| "options": [ | |
| "Fix random seed and use PCA initialization", | |
| "Reduce dataset size", | |
| "Change output dimension to 1D", | |
| "Normalize labels" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Fixing seed and PCA initialization stabilizes t-SNE output across runs." | |
| }, | |
| { | |
| "id": 100, | |
| "questionText": "Scenario: t-SNE applied to 500D embeddings, some clusters overlapping. Recommended approach?", | |
| "options": [ | |
| "Try PCA pre-reduction, adjust perplexity and learning rate, increase iterations", | |
| "Reduce output dimension to 1D", | |
| "Use raw features", | |
| "Remove labels" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Proper preprocessing and hyperparameter tuning help t-SNE separate overlapping clusters." | |
| } | |
| ] | |
| } | |