diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e806effac..0abdd84271 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@
 * [FEATURE] Continuous-test: now runable as a module with `mimir -target=continuous-test`. #7747
 * [FEATURE] Store-gateway: Allow specific tenants to be enabled or disabled via `-store-gateway.enabled-tenants` or `-store-gateway.disabled-tenants` CLI flags or their corresponding YAML settings. #7653
 * [FEATURE] New `-<prefix>.s3.bucket-lookup-type` flag configures lookup style type, used to access bucket in s3 compatible providers. #7684
+* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.promql-engine=streaming`. #7693
 * [FEATURE] Server: added experimental [PROXY protocol support](https://www.haproxy.org/download/2.3/doc/proxy-protocol.txt). The PROXY protocol support can be enabled via `-server.proxy-protocol-enabled=true`. When enabled, the support is added both to HTTP and gRPC listening ports. #7698
 * [ENHANCEMENT] Store-gateway: merge series from different blocks concurrently. #7456
 * [ENHANCEMENT] Store-gateway: Add `stage="wait_max_concurrent"` to `cortex_bucket_store_series_request_stage_duration_seconds` which records how long the query had to wait for its turn for `-blocks-storage.bucket-store.max-concurrent`. #7609
diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json
index 3dfd80d133..3308962c7d 100644
--- a/cmd/mimir/config-descriptor.json
+++ b/cmd/mimir/config-descriptor.json
@@ -1876,6 +1876,17 @@
           "fieldType": "duration",
           "fieldCategory": "advanced"
         },
+        {
+          "kind": "field",
+          "name": "promql_engine",
+          "required": false,
+          "desc": "PromQL engine to use, either 'standard' or 'streaming'",
+          "fieldValue": null,
+          "fieldDefaultValue": "standard",
+          "fieldFlag": "querier.promql-engine",
+          "fieldType": "string",
+          "fieldCategory": "experimental"
+        },
         {
           "kind": "field",
           "name": "max_concurrent",
diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl
index aa9d42dede..11170e87fb 100644
--- a/cmd/mimir/help-all.txt.tmpl
+++ b/cmd/mimir/help-all.txt.tmpl
@@ -1725,6 +1725,8 @@ Usage of ./cmd/mimir/mimir:
     	Delay before initiating requests to further ingesters when request minimization is enabled and the initially selected set of ingesters have not all responded. Ignored if -querier.minimize-ingester-requests is not enabled. (default 3s)
   -querier.prefer-streaming-chunks-from-store-gateways
     	[experimental] Request store-gateways stream chunks. Store-gateways will only respond with a stream of chunks if the target store-gateway supports this, and this preference will be ignored by store-gateways that do not support this.
+  -querier.promql-engine string
+    	[experimental] PromQL engine to use, either 'standard' or 'streaming' (default "standard")
   -querier.promql-experimental-functions-enabled
     	[experimental] Enable experimental PromQL functions. This config option should be set on query-frontend too when query sharding is enabled.
   -querier.query-ingesters-within duration
diff --git a/docs/sources/mimir/configure/about-versioning.md b/docs/sources/mimir/configure/about-versioning.md
index 63634156bc..8e95ff9ed5 100644
--- a/docs/sources/mimir/configure/about-versioning.md
+++ b/docs/sources/mimir/configure/about-versioning.md
@@ -126,6 +126,7 @@ The following features are currently experimental:
   - Maximum response size for active series queries (`-querier.active-series-results-max-size-bytes`)
   - Enable PromQL experimental functions (`-querier.promql-experimental-functions-enabled`)
   - Allow streaming of `/active_series` responses to the frontend (`-querier.response-streaming-enabled`)
+  - Streaming PromQL engine (`-querier.promql-engine=streaming`)
 - Query-frontend
   - `-query-frontend.querier-forget-delay`
   - Instant query splitting (`-query-frontend.split-instant-queries-by-interval`)
diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md
index de69c65af1..a0e5b722c0 100644
--- a/docs/sources/mimir/configure/configuration-parameters/index.md
+++ b/docs/sources/mimir/configure/configuration-parameters/index.md
@@ -1325,6 +1325,10 @@ store_gateway_client:
 # CLI flag: -querier.minimize-ingester-requests-hedging-delay
 [minimize_ingester_requests_hedging_delay: <duration> | default = 3s]
 
+# (experimental) PromQL engine to use, either 'standard' or 'streaming'
+# CLI flag: -querier.promql-engine
+[promql_engine: <string> | default = "standard"]
+
 # The number of workers running in each querier process. This setting limits the
 # maximum number of concurrent queries in each querier.
 # CLI flag: -querier.max-concurrent
diff --git a/integration/querier_test.go b/integration/querier_test.go
index 46c17cb39c..b1d7214caa 100644
--- a/integration/querier_test.go
+++ b/integration/querier_test.go
@@ -514,6 +514,52 @@ func TestQuerierWithBlocksStorageRunningInSingleBinaryMode(t *testing.T) {
 	}
 }
 
+func TestStreamingPromQLEngine(t *testing.T) {
+	s, err := e2e.NewScenario(networkName)
+	require.NoError(t, err)
+	defer s.Close()
+
+	flags := mergeFlags(BlocksStorageFlags(), BlocksStorageS3Flags(), map[string]string{
+		"-querier.promql-engine": "streaming",
+	})
+
+	consul := e2edb.NewConsul()
+	minio := e2edb.NewMinio(9000, flags["-blocks-storage.s3.bucket-name"])
+	require.NoError(t, s.StartAndWaitReady(consul, minio))
+
+	distributor := e2emimir.NewDistributor("distributor", consul.NetworkHTTPEndpoint(), flags)
+	ingester := e2emimir.NewIngester("ingester", consul.NetworkHTTPEndpoint(), flags)
+	querier := e2emimir.NewQuerier("querier", consul.NetworkHTTPEndpoint(), flags)
+	require.NoError(t, s.StartAndWaitReady(distributor, ingester, querier))
+
+	// Wait until the distributor and querier have updated the ring.
+	// The distributor should have 512 tokens for the ingester ring and 1 for the distributor ring,
+	// and the querier should have 512 tokens for the ingester ring.
+	require.NoError(t, distributor.WaitSumMetrics(e2e.Equals(512+1), "cortex_ring_tokens_total"))
+	require.NoError(t, querier.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
+
+	// Push a series to Mimir.
+	writeClient, err := e2emimir.NewClient(distributor.HTTPEndpoint(), "", "", "", "user-1")
+	require.NoError(t, err)
+
+	seriesName := "series_1"
+	seriesTimestamp := time.Now()
+	series, expectedVector, _ := generateFloatSeries(seriesName, seriesTimestamp, prompb.Label{Name: seriesName, Value: seriesName})
+
+	res, err := writeClient.Push(series)
+	require.NoError(t, err)
+	require.Equal(t, 200, res.StatusCode)
+
+	// Query back the same series using the streaming PromQL engine.
+	c, err := e2emimir.NewClient("", querier.HTTPEndpoint(), "", "", "user-1")
+	require.NoError(t, err)
+
+	result, err := c.Query(seriesName, seriesTimestamp)
+	require.NoError(t, err)
+	require.Equal(t, model.ValVector, result.Type())
+	assert.Equal(t, expectedVector, result.(model.Vector))
+}
+
 func testMetadataQueriesWithBlocksStorage(
 	t *testing.T,
 	c *e2emimir.Client,
diff --git a/pkg/api/handlers.go b/pkg/api/handlers.go
index 45a0b941d6..b1cfd1529e 100644
--- a/pkg/api/handlers.go
+++ b/pkg/api/handlers.go
@@ -212,7 +212,7 @@ func NewQuerierHandler(
 	queryable storage.SampleAndChunkQueryable,
 	exemplarQueryable storage.ExemplarQueryable,
 	metadataSupplier querier.MetadataSupplier,
-	engine *promql.Engine,
+	engine promql.QueryEngine,
 	distributor Distributor,
 	reg prometheus.Registerer,
 	logger log.Logger,
diff --git a/pkg/mimir/mimir.go b/pkg/mimir/mimir.go
index 6b42d74839..0780b9b80b 100644
--- a/pkg/mimir/mimir.go
+++ b/pkg/mimir/mimir.go
@@ -709,7 +709,7 @@ type Mimir struct {
 	QuerierQueryable              prom_storage.SampleAndChunkQueryable
 	ExemplarQueryable             prom_storage.ExemplarQueryable
 	MetadataSupplier              querier.MetadataSupplier
-	QuerierEngine                 *promql.Engine
+	QuerierEngine                 promql.QueryEngine
 	QueryFrontendTripperware      querymiddleware.Tripperware
 	QueryFrontendCodec            querymiddleware.Codec
 	Ruler                         *ruler.Ruler
diff --git a/pkg/mimir/mimir_test.go b/pkg/mimir/mimir_test.go
index dde93f51e0..4dd6676e53 100644
--- a/pkg/mimir/mimir_test.go
+++ b/pkg/mimir/mimir_test.go
@@ -45,6 +45,7 @@ import (
 	"github.com/grafana/mimir/pkg/distributor"
 	"github.com/grafana/mimir/pkg/frontend/v1/frontendv1pb"
 	"github.com/grafana/mimir/pkg/ingester"
+	"github.com/grafana/mimir/pkg/querier"
 	"github.com/grafana/mimir/pkg/ruler"
 	"github.com/grafana/mimir/pkg/ruler/rulestore"
 	"github.com/grafana/mimir/pkg/scheduler/schedulerpb"
@@ -161,6 +162,9 @@ func TestMimir(t *testing.T) {
 			ReplicationFactor:      1,
 			InstanceInterfaceNames: []string{"en0", "eth0", "lo0", "lo"},
 		}},
+		Querier: querier.Config{
+			PromQLEngine: "standard",
+		},
 	}
 	require.NoError(t, cfg.Server.LogLevel.Set("info"))
 
diff --git a/pkg/mimir/modules.go b/pkg/mimir/modules.go
index fb55498fd9..354d480a41 100644
--- a/pkg/mimir/modules.go
+++ b/pkg/mimir/modules.go
@@ -500,9 +500,12 @@ func (t *Mimir) initQueryable() (serv services.Service, err error) {
 	registerer := prometheus.WrapRegistererWith(querierEngine, t.Registerer)
 
 	// Create a querier queryable and PromQL engine
-	t.QuerierQueryable, t.ExemplarQueryable, t.QuerierEngine = querier.New(
+	t.QuerierQueryable, t.ExemplarQueryable, t.QuerierEngine, err = querier.New(
 		t.Cfg.Querier, t.Overrides, t.Distributor, t.StoreQueryable, registerer, util_log.Logger, t.ActivityTracker,
 	)
+	if err != nil {
+		return nil, fmt.Errorf("could not create queryable: %w", err)
+	}
 
 	// Use the distributor to return metric metadata by default
 	t.MetadataSupplier = t.Distributor
@@ -842,7 +845,11 @@ func (t *Mimir) initRuler() (serv services.Service, err error) {
 		// TODO: Consider wrapping logger to differentiate from querier module logger
 		rulerRegisterer := prometheus.WrapRegistererWith(rulerEngine, t.Registerer)
 
-		queryable, _, eng := querier.New(t.Cfg.Querier, t.Overrides, t.Distributor, t.StoreQueryable, rulerRegisterer, util_log.Logger, t.ActivityTracker)
+		queryable, _, eng, err := querier.New(t.Cfg.Querier, t.Overrides, t.Distributor, t.StoreQueryable, rulerRegisterer, util_log.Logger, t.ActivityTracker)
+		if err != nil {
+			return nil, fmt.Errorf("could not create queryable for ruler: %w", err)
+		}
+
 		queryable = querier.NewErrorTranslateQueryableWithFn(queryable, ruler.WrapQueryableErrors)
 
 		if t.Cfg.Ruler.TenantFederation.Enabled {
diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go
index 4ac5a89fc9..06dd79cce6 100644
--- a/pkg/querier/querier.go
+++ b/pkg/querier/querier.go
@@ -29,6 +29,7 @@ import (
 	"github.com/grafana/mimir/pkg/querier/stats"
 	"github.com/grafana/mimir/pkg/storage/chunk"
 	"github.com/grafana/mimir/pkg/storage/lazyquery"
+	"github.com/grafana/mimir/pkg/streamingpromql"
 	"github.com/grafana/mimir/pkg/util"
 	"github.com/grafana/mimir/pkg/util/activitytracker"
 	"github.com/grafana/mimir/pkg/util/limiter"
@@ -55,12 +56,16 @@ type Config struct {
 	MinimizeIngesterRequests                       bool          `yaml:"minimize_ingester_requests" category:"advanced"`
 	MinimiseIngesterRequestsHedgingDelay           time.Duration `yaml:"minimize_ingester_requests_hedging_delay" category:"advanced"`
 
+	PromQLEngine string `yaml:"promql_engine" category:"experimental"`
+
 	// PromQL engine config.
 	EngineConfig engine.Config `yaml:",inline"`
 }
 
 const (
-	queryStoreAfterFlag = "querier.query-store-after"
+	queryStoreAfterFlag   = "querier.query-store-after"
+	standardPromQLEngine  = "standard"
+	streamingPromQLEngine = "streaming"
 )
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
@@ -82,10 +87,16 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.Uint64Var(&cfg.StreamingChunksPerIngesterSeriesBufferSize, "querier.streaming-chunks-per-ingester-buffer-size", 256, "Number of series to buffer per ingester when streaming chunks from ingesters.")
 	f.Uint64Var(&cfg.StreamingChunksPerStoreGatewaySeriesBufferSize, "querier.streaming-chunks-per-store-gateway-buffer-size", 256, "Number of series to buffer per store-gateway when streaming chunks from store-gateways.")
 
+	f.StringVar(&cfg.PromQLEngine, "querier.promql-engine", standardPromQLEngine, fmt.Sprintf("PromQL engine to use, either '%v' or '%v'", standardPromQLEngine, streamingPromQLEngine))
+
 	cfg.EngineConfig.RegisterFlags(f)
 }
 
 func (cfg *Config) Validate() error {
+	if cfg.PromQLEngine != standardPromQLEngine && cfg.PromQLEngine != streamingPromQLEngine {
+		return fmt.Errorf("unknown PromQL engine '%s'", cfg.PromQLEngine)
+	}
+
 	return nil
 }
 
@@ -123,7 +134,7 @@ func ShouldQueryBlockStore(queryStoreAfter time.Duration, now time.Time, queryMi
 }
 
 // New builds a queryable and promql engine.
-func New(cfg Config, limits *validation.Overrides, distributor Distributor, storeQueryable storage.Queryable, reg prometheus.Registerer, logger log.Logger, tracker *activitytracker.ActivityTracker) (storage.SampleAndChunkQueryable, storage.ExemplarQueryable, *promql.Engine) {
+func New(cfg Config, limits *validation.Overrides, distributor Distributor, storeQueryable storage.Queryable, reg prometheus.Registerer, logger log.Logger, tracker *activitytracker.ActivityTracker) (storage.SampleAndChunkQueryable, storage.ExemplarQueryable, promql.QueryEngine, error) {
 	queryMetrics := stats.NewQueryMetrics(reg)
 
 	distributorQueryable := newDistributorQueryable(distributor, limits, queryMetrics, logger)
@@ -139,13 +150,28 @@ func New(cfg Config, limits *validation.Overrides, distributor Distributor, stor
 		return lazyquery.NewLazyQuerier(querier), nil
 	})
 
-	engineOpts, engineExperimentalFunctionsEnabled := engine.NewPromQLEngineOptions(cfg.EngineConfig, tracker, logger, reg)
-	engine := promql.NewEngine(engineOpts)
+	opts, engineExperimentalFunctionsEnabled := engine.NewPromQLEngineOptions(cfg.EngineConfig, tracker, logger, reg)
 
 	// Experimental functions can only be enabled globally, and not on a per-engine basis.
 	parser.EnableExperimentalFunctions = engineExperimentalFunctionsEnabled
 
-	return NewSampleAndChunkQueryable(lazyQueryable), exemplarQueryable, engine
+	var eng promql.QueryEngine
+
+	switch cfg.PromQLEngine {
+	case standardPromQLEngine:
+		eng = promql.NewEngine(opts)
+	case streamingPromQLEngine:
+		var err error
+
+		eng, err = streamingpromql.NewEngine(opts)
+		if err != nil {
+			return nil, nil, nil, err
+		}
+	default:
+		panic(fmt.Sprintf("invalid config not caught by validation: unknown PromQL engine '%s'", cfg.PromQLEngine))
+	}
+
+	return NewSampleAndChunkQueryable(lazyQueryable), exemplarQueryable, eng, nil
 }
 
 // NewSampleAndChunkQueryable creates a SampleAndChunkQueryable from a Queryable.
diff --git a/pkg/querier/querier_test.go b/pkg/querier/querier_test.go
index df95d36e0e..49de21b490 100644
--- a/pkg/querier/querier_test.go
+++ b/pkg/querier/querier_test.go
@@ -221,7 +221,9 @@ func TestQuerier(t *testing.T) {
 			overrides, err := validation.NewOverrides(defaultLimitsConfig(), nil)
 			require.NoError(t, err)
 
-			queryable, _, _ := New(cfg, overrides, distributor, db, nil, log.NewNopLogger(), nil)
+			queryable, _, _, err := New(cfg, overrides, distributor, db, nil, log.NewNopLogger(), nil)
+			require.NoError(t, err)
+
 			testRangeQuery(t, queryable, through, q)
 		})
 	}
@@ -297,7 +299,9 @@ func TestQuerier_QueryableReturnsChunksOutsideQueriedRange(t *testing.T) {
 		Timeout:    1 * time.Minute,
 	})
 
-	queryable, _, _ := New(cfg, overrides, distributor, nil, nil, logger, nil)
+	queryable, _, _, err := New(cfg, overrides, distributor, nil, nil, logger, nil)
+	require.NoError(t, err)
+
 	ctx := user.InjectOrgID(context.Background(), "user-1")
 	query, err := engine.NewRangeQuery(ctx, queryable, nil, `sum({__name__=~".+"})`, queryStart, queryEnd, queryStep)
 	require.NoError(t, err)
@@ -382,7 +386,9 @@ func TestBatchMergeChunks(t *testing.T) {
 		Timeout:    1 * time.Minute,
 	})
 
-	queryable, _, _ := New(cfg, overrides, distributor, nil, nil, logger, nil)
+	queryable, _, _, err := New(cfg, overrides, distributor, nil, nil, logger, nil)
+	require.NoError(t, err)
+
 	ctx := user.InjectOrgID(context.Background(), "user-1")
 	query, err := engine.NewRangeQuery(ctx, queryable, nil, `rate({__name__=~".+"}[10s])`, queryStart, queryEnd, queryStep)
 	require.NoError(t, err)
@@ -452,7 +458,9 @@ func BenchmarkQueryExecute(b *testing.B) {
 				Timeout:    1 * time.Minute,
 			})
 
-			queryable, _, _ := New(cfg, overrides, distributor, nil, nil, logger, nil)
+			queryable, _, _, err := New(cfg, overrides, distributor, nil, nil, logger, nil)
+			require.NoError(b, err)
+
 			ctx := user.InjectOrgID(context.Background(), "user-1")
 
 			b.Run(name, func(b *testing.B) {
@@ -597,7 +605,7 @@ func TestQuerier_QueryIngestersWithinConfig(t *testing.T) {
 		MaxSamples:         1e6,
 		Timeout:            1 * time.Minute,
 	})
-	cfg := Config{}
+	cfg := Config{PromQLEngine: standardPromQLEngine}
 	for _, c := range testCases {
 		t.Run(c.name, func(t *testing.T) {
 			distributor := &errDistributor{}
@@ -610,7 +618,8 @@ func TestQuerier_QueryIngestersWithinConfig(t *testing.T) {
 			// block storage will not be hit; provide nil querier
 			var storeQueryable storage.Queryable
 
-			queryable, _, _ := New(cfg, overrides, distributor, storeQueryable, nil, log.NewNopLogger(), nil)
+			queryable, _, _, err := New(cfg, overrides, distributor, storeQueryable, nil, log.NewNopLogger(), nil)
+			require.NoError(t, err)
 			ctx := user.InjectOrgID(context.Background(), "0")
 			query, err := engine.NewRangeQuery(ctx, queryable, nil, "dummy", c.mint, c.maxt, 1*time.Minute)
 			require.NoError(t, err)
@@ -694,7 +703,9 @@ func TestQuerier_ValidateQueryTimeRange_MaxQueryIntoFuture(t *testing.T) {
 			overrides, err := validation.NewOverrides(defaultLimitsConfig(), nil)
 			require.NoError(t, err)
 
-			queryable, _, _ := New(cfg, overrides, distributor, nil, nil, log.NewNopLogger(), nil)
+			queryable, _, _, err := New(cfg, overrides, distributor, nil, nil, log.NewNopLogger(), nil)
+			require.NoError(t, err)
+
 			ctx := user.InjectOrgID(context.Background(), "0")
 			query, err := engine.NewRangeQuery(ctx, queryable, nil, "dummy", c.queryStartTime, c.queryEndTime, time.Minute)
 			require.NoError(t, err)
@@ -766,7 +777,8 @@ func TestQuerier_ValidateQueryTimeRange_MaxQueryLength(t *testing.T) {
 
 			// We don't need to query any data for this test, so an empty distributor is fine.
 			distributor := &emptyDistributor{}
-			queryable, _, _ := New(cfg, overrides, distributor, nil, nil, log.NewNopLogger(), nil)
+			queryable, _, _, err := New(cfg, overrides, distributor, nil, nil, log.NewNopLogger(), nil)
+			require.NoError(t, err)
 
 			// Create the PromQL engine to execute the query.
 			engine := promql.NewEngine(promql.EngineOpts{
@@ -887,7 +899,8 @@ func TestQuerier_ValidateQueryTimeRange_MaxQueryLookback(t *testing.T) {
 				distributor.On("Query", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(model.Matrix{}, nil)
 				distributor.On("QueryStream", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(client.CombinedQueryStreamResponse{}, nil)
 
-				queryable, _, _ := New(cfg, overrides, distributor, nil, nil, log.NewNopLogger(), nil)
+				queryable, _, _, err := New(cfg, overrides, distributor, nil, nil, log.NewNopLogger(), nil)
+				require.NoError(t, err)
 
 				query, err := engine.NewRangeQuery(ctx, queryable, nil, testData.query, testData.queryStartTime, testData.queryEndTime, time.Minute)
 				require.NoError(t, err)
@@ -914,7 +927,9 @@ func TestQuerier_ValidateQueryTimeRange_MaxQueryLookback(t *testing.T) {
 				distributor := &mockDistributor{}
 				distributor.On("MetricsForLabelMatchers", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]labels.Labels{}, nil)
 
-				queryable, _, _ := New(cfg, overrides, distributor, nil, nil, log.NewNopLogger(), nil)
+				queryable, _, _, err := New(cfg, overrides, distributor, nil, nil, log.NewNopLogger(), nil)
+				require.NoError(t, err)
+
 				q, err := queryable.Querier(util.TimeToMillis(testData.queryStartTime), util.TimeToMillis(testData.queryEndTime))
 				require.NoError(t, err)
 
@@ -949,7 +964,9 @@ func TestQuerier_ValidateQueryTimeRange_MaxQueryLookback(t *testing.T) {
 				distributor := &mockDistributor{}
 				distributor.On("LabelNames", mock.Anything, mock.Anything, mock.Anything, matchers).Return([]string{}, nil)
 
-				queryable, _, _ := New(cfg, overrides, distributor, nil, nil, log.NewNopLogger(), nil)
+				queryable, _, _, err := New(cfg, overrides, distributor, nil, nil, log.NewNopLogger(), nil)
+				require.NoError(t, err)
+
 				q, err := queryable.Querier(util.TimeToMillis(testData.queryStartTime), util.TimeToMillis(testData.queryEndTime))
 				require.NoError(t, err)
 
@@ -975,7 +992,9 @@ func TestQuerier_ValidateQueryTimeRange_MaxQueryLookback(t *testing.T) {
 				distributor := &mockDistributor{}
 				distributor.On("LabelValuesForLabelName", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]string{}, nil)
 
-				queryable, _, _ := New(cfg, overrides, distributor, nil, nil, log.NewNopLogger(), nil)
+				queryable, _, _, err := New(cfg, overrides, distributor, nil, nil, log.NewNopLogger(), nil)
+				require.NoError(t, err)
+
 				q, err := queryable.Querier(util.TimeToMillis(testData.queryStartTime), util.TimeToMillis(testData.queryEndTime))
 				require.NoError(t, err)
 
@@ -1045,7 +1064,9 @@ func TestQuerier_MaxLabelsQueryRange(t *testing.T) {
 				distributor := &mockDistributor{}
 				distributor.On("MetricsForLabelMatchers", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]labels.Labels{}, nil)
 
-				queryable, _, _ := New(cfg, overrides, distributor, storeQueryable, nil, log.NewNopLogger(), nil)
+				queryable, _, _, err := New(cfg, overrides, distributor, storeQueryable, nil, log.NewNopLogger(), nil)
+				require.NoError(t, err)
+
 				q, err := queryable.Querier(util.TimeToMillis(testData.queryStartTime), util.TimeToMillis(testData.queryEndTime))
 				require.NoError(t, err)
 
@@ -1248,7 +1269,8 @@ func TestQuerier_QueryStoreAfterConfig(t *testing.T) {
 			querier := &mockBlocksStorageQuerier{}
 			querier.On("Select", mock.Anything, true, mock.Anything, expectedMatchers).Return(storage.EmptySeriesSet())
 
-			queryable, _, _ := New(cfg, overrides, distributor, newMockBlocksStorageQueryable(querier), nil, log.NewNopLogger(), nil)
+			queryable, _, _, err := New(cfg, overrides, distributor, newMockBlocksStorageQueryable(querier), nil, log.NewNopLogger(), nil)
+			require.NoError(t, err)
 			ctx := user.InjectOrgID(context.Background(), "0")
 			query, err := engine.NewRangeQuery(ctx, queryable, nil, "metric", c.mint, c.maxt, 1*time.Minute)
 			require.NoError(t, err)
diff --git a/pkg/streamingpromql/README.md b/pkg/streamingpromql/README.md
new file mode 100644
index 0000000000..cc10eeebb2
--- /dev/null
+++ b/pkg/streamingpromql/README.md
@@ -0,0 +1,97 @@
+# Streaming PromQL engine
+
+This file contains a brief overview of the internals of the streaming PromQL engine.
+
+For an introduction to the engine itself and the problems it tries to solve, check out [this PromCon 2023 talk](https://www.youtube.com/watch?v=3kM2Asj6hcg).
+
+The goal of the engine is to allow evaluating queries over millions of series in a safe, performant and cost-effective way.
+To allow this, the engine aims to ensure that peak memory consumption of queriers is not proportional to the number of series selected.
+This will make it safe for operators to loosen the various query-related limits without risking the stability of their Mimir cluster or needing to devote enormous amounts of compute resources to queriers.
+
+The key way the engine achieves this is by not loading all the input series into memory at once, and instead streaming them into memory when needed.
+
+For example, let's say we're evaluating the query `sum by (environment) (some_metric{cluster="cluster-1"})`.
+
+Prometheus' PromQL engine will first load all samples for all series selected by `some_metric{cluster="cluster-1"}` into memory.
+It will then compute the sum for each unique value of `environment`.
+At its peak, Prometheus' PromQL engine will hold all samples for all input series (from `some_metric{cluster="cluster-1"}`) and all samples for all output series in memory at once.
+
+The streaming engine here will instead execute the selector `some_metric{cluster="cluster-1"}` and gather the labels of all series returned.
+With these labels, it will then compute all the possible output series for the `sum by (environment)` operation (ie. one output series per unique value of `environment`).
+Having computed the output series, it will then begin reading series from the selector, one at a time, and update the running total for the appropriate output series.
+At its peak, the streaming engine in this example will hold all samples for one input series and all samples for all output series in memory at once[^1],
+a significant reduction compared to Prometheus' PromQL engine, particularly when the selector selects many series.
+
+This idea of streaming can be applied to multiple levels as well. Imagine we're evaluating the query `max(sum by (environment) (some_metric{cluster="cluster-1"}))`.
+In the streaming engine, once the result of each group series produced by `sum` is complete, it is passed to `max`, which can update its running maximum seen so far across all groups.
+At its peak, the streaming engine will hold all samples for one input series, all samples for all incomplete `sum` group series, and the single incomplete `max` output series in memory at once.
+
+## Internals
+
+Within the streaming engine, a query is represented by a set of linked operators (one for each operation) that together form the query plan.
+
+For example, the `max(sum by (environment) (some_metric{cluster="cluster-1"}))` example from before would have a query plan made up of three operators:
+
+- The instant vector selector operator (`some_metric{cluster="cluster-1"}`)
+- The `sum` aggregation operator (`sum by (environment) (...)`), which consumes series from the instant vector selector operator
+- The `max` aggregation operator (`max (...)`), which consumes series from the `sum` aggregation operator
+
+Visually, the plan looks like this:
+
+```mermaid
+flowchart TB
+    IVS["`**instant vector selector**
+    some_metric#123;cluster=#quot;cluster-1#quot;#125;`"]
+    sum["`**sum aggregation**
+    sum by (environment) (...)`"]
+    max["`**max aggregation**
+    max (...)`"]
+    output((output))
+    IVS --> sum
+    sum --> max
+    max --> output
+```
+
+Each of these operators satisfies the `InstantVectorOperator` interface, defined [here](./operator/operator.go).
+The two key methods of this interface are `SeriesMetadata()` and `Next()`:
+
+`SeriesMetadata()` returns the list of all series' labels that will be returned by the operator[^2].
+In our example, the instant vector selector operator would return all the matching `some_metric` series, and the `sum` aggregation operator would return one series for each unique value of `environment`.
+
+`Next()` is then called by the consuming operator to read each series' data, one series at a time.
+In our example, the `sum` aggregation operator would call `Next()` on the instant vector selector operator to get the first series' data, then again to get the second series' data and so on.
+
+Elaborating on the example from before, the overall query would proceed like this, assuming the request is received over HTTP:
+
+1. query HTTP API handler calls `Engine.NewInstantQuery()` or `Engine.NewRangeQuery()` as appropriate ([source](./engine.go))
+   1. engine parses PromQL expression using Prometheus' PromQL parser, producing an abstract syntax tree (AST) ([source](./query.go))
+   1. engine converts AST produced by PromQL parser to query plan ([source](./query.go))
+   1. engine returns created `Query` instance
+1. query HTTP API handler calls `Query.Exec()`
+   1. `Query.Exec()` calls `SeriesMetadata()` on `max` aggregation operator
+      1. `max` aggregation operator calls `SeriesMetadata()` on `sum` aggregation operator
+         1. `sum` aggregation operator calls `SeriesMetadata()` on instant vector selector operator
+            - instant vector selector operator issues `Select()` call, which retrieves labels from ingesters and store-gateways
+         1. `sum` aggregation operator computes output series (one per unique value of `environment`) based on input series from instant vector selector
+      1. `max` aggregation operator computes output series based on input series from `sum` aggregation operator
+         - in this case, there's just one output series, given no grouping is being performed
+   1. root of the query calls `Next()` on `max` aggregation operator until all series have been returned
+      1. `max` aggregation operator calls `Next()` on `sum` aggregation operator
+         1. `sum` aggregation operator calls `Next()` on instant vector selector operator
+            - instant vector selector returns samples for next series
+         1. `sum` aggregation operator updates its running totals for the relevant output series
+         1. if all input series have now been seen for the output series just updated, `sum` aggregation operator returns that output series and removes it from its internal state
+         1. otherwise, it calls `Next()` again and repeats
+      1. `max` aggregation operator updates its running maximum based on the series returned
+      1. if all input series have been seen, `max` aggregation operator returns
+      1. otherwise, it calls `Next()` again and repeats
+1. query HTTP API handler converts returned result to wire format (either JSON or Protobuf) and sends to caller
+1. query HTTP API handler calls `Query.Close()` to release remaining resources
+
+[^1]:
+    This isn't strictly correct, as chunks streaming will buffer chunks for some series in memory as they're received over the network, and it ignores the initial memory consumption caused by the non-streaming calls to `SeriesMetadata()`.
+    But this applies equally to both engines when used in Mimir.
+
+[^2]:
+    This isn't done in a streaming fashion: all series' labels are loaded into memory at once.
+    In a future iteration of the engine, `SeriesMetadata()` could be made streaming as well, but this is out of scope for now.
diff --git a/pkg/streamingpromql/compare.sh b/pkg/streamingpromql/compare.sh
new file mode 100755
index 0000000000..63f2ba6854
--- /dev/null
+++ b/pkg/streamingpromql/compare.sh
@@ -0,0 +1,17 @@
+#! /usr/bin/env bash
+# SPDX-License-Identifier: AGPL-3.0-only
+# This script compares benchmark results for the two engines.
+
+set -euo pipefail
+
+RESULTS_FILE="$1" # Should be the path to a file produced by a command like `go test -run=XXX -bench="BenchmarkQuery" -count=6 -benchmem -timeout=1h .`
+
+STANDARD_RESULTS_FILE=$(mktemp /tmp/standard.XXXX)
+STREAMING_RESULTS_FILE=$(mktemp /tmp/streaming.XXXX)
+
+grep --invert-match "streaming-" "$RESULTS_FILE" | sed -E 's#/standard-[0-9]+##g' > "$STANDARD_RESULTS_FILE"
+grep --invert-match "standard-" "$RESULTS_FILE" | sed -E 's#/streaming-[0-9]+##g' > "$STREAMING_RESULTS_FILE"
+
+benchstat "$STANDARD_RESULTS_FILE" "$STREAMING_RESULTS_FILE" | sed "s#$STANDARD_RESULTS_FILE#     standard     #g" | sed "s#$STREAMING_RESULTS_FILE#     streaming     #g"
+
+rm "$STANDARD_RESULTS_FILE" "$STREAMING_RESULTS_FILE"
diff --git a/pkg/streamingpromql/comparison_test.go b/pkg/streamingpromql/comparison_test.go
new file mode 100644
index 0000000000..2b650e2cb5
--- /dev/null
+++ b/pkg/streamingpromql/comparison_test.go
@@ -0,0 +1,446 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/promql/bench_test.go
+// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/util/teststorage/storage.go
+// Provenance-includes-license: Apache-2.0
+// Provenance-includes-copyright: The Prometheus Authors
+
+package streamingpromql
+
+import (
+	"context"
+	"fmt"
+	"slices"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/prometheus/prometheus/model/labels"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/promql/parser"
+	"github.com/prometheus/prometheus/storage"
+	"github.com/prometheus/prometheus/tsdb"
+	"github.com/stretchr/testify/require"
+)
+
+type benchCase struct {
+	expr  string
+	steps int
+}
+
+func (c benchCase) Name() string {
+	name := c.expr
+
+	if c.steps == 0 {
+		name += ", instant query"
+	} else if c.steps == 1 {
+		name += fmt.Sprintf(", range query with %d step", c.steps)
+	} else {
+		name += fmt.Sprintf(", range query with %d steps", c.steps)
+	}
+
+	return name
+}
+
+func (c benchCase) Run(ctx context.Context, t testing.TB, start, end time.Time, interval time.Duration, engine promql.QueryEngine, db *tsdb.DB) (*promql.Result, func()) {
+	var qry promql.Query
+	var err error
+
+	if c.steps == 0 {
+		qry, err = engine.NewInstantQuery(ctx, db, nil, c.expr, start)
+	} else {
+		qry, err = engine.NewRangeQuery(ctx, db, nil, c.expr, start, end, interval)
+	}
+
+	if err != nil {
+		require.NoError(t, err)
+		return nil, nil
+	}
+
+	res := qry.Exec(ctx)
+
+	if res.Err != nil {
+		require.NoError(t, res.Err)
+		return nil, nil
+	}
+
+	return res, qry.Close
+}
+
+// These test cases are taken from https://github.com/prometheus/prometheus/blob/main/promql/bench_test.go.
+func testCases(metricSizes []int) []benchCase {
+	cases := []benchCase{
+		// Plain retrieval.
+		{
+			expr: "a_X",
+		},
+		// Simple rate.
+		{
+			expr: "rate(a_X[1m])",
+		},
+		{
+			expr:  "rate(a_X[1m])",
+			steps: 10000,
+		},
+		//// Holt-Winters and long ranges.
+		//{
+		//	expr: "holt_winters(a_X[1d], 0.3, 0.3)",
+		//},
+		//{
+		//	expr: "changes(a_X[1d])",
+		//},
+		{
+			expr: "rate(a_X[1d])",
+		},
+		//{
+		//	expr: "absent_over_time(a_X[1d])",
+		//},
+		//// Unary operators.
+		//{
+		//	expr: "-a_X",
+		//},
+		//// Binary operators.
+		//{
+		//	expr: "a_X - b_X",
+		//},
+		//{
+		//	expr:  "a_X - b_X",
+		//	steps: 10000,
+		//},
+		//{
+		//	expr: "a_X and b_X{l=~'.*[0-4]$'}",
+		//},
+		//{
+		//	expr: "a_X or b_X{l=~'.*[0-4]$'}",
+		//},
+		//{
+		//	expr: "a_X unless b_X{l=~'.*[0-4]$'}",
+		//},
+		//{
+		//	expr: "a_X and b_X{l='notfound'}",
+		//},
+		//// Simple functions.
+		//{
+		//	expr: "abs(a_X)",
+		//},
+		//{
+		//	expr: "label_replace(a_X, 'l2', '$1', 'l', '(.*)')",
+		//},
+		//{
+		//	expr: "label_join(a_X, 'l2', '-', 'l', 'l')",
+		//},
+		// Simple aggregations.
+		{
+			expr: "sum(a_X)",
+		},
+		//{
+		//	expr: "sum without (l)(h_X)",
+		//},
+		//{
+		//	expr: "sum without (le)(h_X)",
+		//},
+		{
+			expr: "sum by (l)(h_X)",
+		},
+		{
+			expr: "sum by (le)(h_X)",
+		},
+		//{
+		//	expr: "count_values('value', h_X)",
+		//  steps: 100,
+		//},
+		//{
+		//	expr: "topk(1, a_X)",
+		//},
+		//{
+		//	expr: "topk(5, a_X)",
+		//},
+		//// Combinations.
+		//{
+		//	expr: "rate(a_X[1m]) + rate(b_X[1m])",
+		//},
+		{
+			expr: "sum by (le)(rate(h_X[1m]))",
+		},
+		//{
+		//	expr: "sum without (l)(rate(a_X[1m]))",
+		//},
+		//{
+		//	expr: "sum without (l)(rate(a_X[1m])) / sum without (l)(rate(b_X[1m]))",
+		//},
+		//{
+		//	expr: "histogram_quantile(0.9, rate(h_X[5m]))",
+		//},
+		//// Many-to-one join.
+		//{
+		//	expr: "a_X + on(l) group_right a_one",
+		//},
+		//// Label compared to blank string.
+		//{
+		//	expr:  "count({__name__!=\"\"})",
+		//	steps: 1,
+		//},
+		//{
+		//	expr:  "count({__name__!=\"\",l=\"\"})",
+		//	steps: 1,
+		//},
+		//// Functions which have special handling inside eval()
+		//{
+		//	expr: "timestamp(a_X)",
+		//},
+	}
+
+	// X in an expr will be replaced by different metric sizes.
+	tmp := []benchCase{}
+	for _, c := range cases {
+		if !strings.Contains(c.expr, "X") {
+			tmp = append(tmp, c)
+		} else {
+			for _, count := range metricSizes {
+				tmp = append(tmp, benchCase{expr: strings.ReplaceAll(c.expr, "X", strconv.Itoa(count)), steps: c.steps})
+			}
+		}
+	}
+	cases = tmp
+
+	// No step will be replaced by cases with the standard step.
+	tmp = []benchCase{}
+	for _, c := range cases {
+		if c.steps != 0 {
+			tmp = append(tmp, c)
+		} else {
+			tmp = append(tmp, benchCase{expr: c.expr, steps: 0})
+			tmp = append(tmp, benchCase{expr: c.expr, steps: 1})
+			tmp = append(tmp, benchCase{expr: c.expr, steps: 100})
+			tmp = append(tmp, benchCase{expr: c.expr, steps: 1000})
+		}
+	}
+	return tmp
+}
+
+// This is based on the benchmarks from https://github.com/prometheus/prometheus/blob/main/promql/bench_test.go.
+func BenchmarkQuery(b *testing.B) {
+	db := newTestDB(b)
+	db.DisableCompactions() // Don't want auto-compaction disrupting timings.
+	opts := newTestEngineOpts()
+
+	standardEngine := promql.NewEngine(opts)
+	streamingEngine, err := NewEngine(opts)
+	require.NoError(b, err)
+
+	engines := map[string]promql.QueryEngine{
+		"standard":  standardEngine,
+		"streaming": streamingEngine,
+	}
+
+	const interval = 10000 // 10s interval.
+	// A day of data plus 10k steps.
+	numIntervals := 8640 + 10000
+
+	metricSizes := []int{1, 100, 2000}
+	err = setupTestData(db, metricSizes, interval, numIntervals)
+	require.NoError(b, err)
+	cases := testCases(metricSizes)
+	ctx := context.Background()
+
+	for _, c := range cases {
+		start := time.Unix(int64((numIntervals-c.steps)*10), 0)
+		end := time.Unix(int64(numIntervals*10), 0)
+		interval := time.Second * 10
+
+		b.Run(c.Name(), func(b *testing.B) {
+			// Check both engines produce the same result before running the benchmark.
+			standardResult, standardClose := c.Run(ctx, b, start, end, interval, standardEngine, db)
+			streamingResult, streamingClose := c.Run(ctx, b, start, end, interval, streamingEngine, db)
+
+			requireEqualResults(b, standardResult, streamingResult)
+
+			standardClose()
+			streamingClose()
+
+			for name, engine := range engines {
+				b.Run(name, func(b *testing.B) {
+					for i := 0; i < b.N; i++ {
+						res, cleanup := c.Run(ctx, b, start, end, interval, engine, db)
+
+						if res != nil {
+							cleanup()
+						}
+					}
+				})
+			}
+		})
+	}
+}
+
+func TestBenchmarkQueries(t *testing.T) {
+	db := newTestDB(t)
+	opts := newTestEngineOpts()
+
+	standardEngine := promql.NewEngine(opts)
+	streamingEngine, err := NewEngine(opts)
+	require.NoError(t, err)
+
+	const interval = 10000 // 10s interval.
+	// A day of data plus 10k steps.
+	numIntervals := 8640 + 10000
+
+	metricSizes := []int{1, 100} // Don't bother with 2000 series test here: these test cases take a while and they're most interesting as benchmarks, not correctness tests.
+	err = setupTestData(db, metricSizes, interval, numIntervals)
+	require.NoError(t, err)
+	cases := testCases(metricSizes)
+
+	for _, c := range cases {
+		t.Run(c.Name(), func(t *testing.T) {
+			start := time.Unix(int64((numIntervals-c.steps)*10), 0)
+			end := time.Unix(int64(numIntervals*10), 0)
+			interval := time.Second * 10
+			ctx := context.Background()
+
+			standardResult, standardClose := c.Run(ctx, t, start, end, interval, standardEngine, db)
+			streamingResult, streamingClose := c.Run(ctx, t, start, end, interval, streamingEngine, db)
+
+			requireEqualResults(t, standardResult, streamingResult)
+
+			standardClose()
+			streamingClose()
+		})
+	}
+}
+
+// Why do we do this rather than require.Equal(t, expected, actual)?
+// It's possible that floating point values are slightly different due to imprecision, but require.Equal doesn't allow us to set an allowable difference.
+func requireEqualResults(t testing.TB, expected, actual *promql.Result) {
+	require.Equal(t, expected.Err, actual.Err)
+
+	// Ignore warnings until they're supported by the streaming engine.
+	// require.Equal(t, expected.Warnings, actual.Warnings)
+
+	require.Equal(t, expected.Value.Type(), actual.Value.Type())
+
+	switch expected.Value.Type() {
+	case parser.ValueTypeVector:
+		expectedVector, err := expected.Vector()
+		require.NoError(t, err)
+		actualVector, err := actual.Vector()
+		require.NoError(t, err)
+
+		// Instant queries don't guarantee any particular sort order, so sort results here so that we can easily compare them.
+		sortVector(expectedVector)
+		sortVector(actualVector)
+
+		require.Len(t, actualVector, len(expectedVector))
+
+		for i, expectedSample := range expectedVector {
+			actualSample := actualVector[i]
+
+			require.Equal(t, expectedSample.Metric, actualSample.Metric)
+			require.Equal(t, expectedSample.T, actualSample.T)
+			require.Equal(t, expectedSample.H, actualSample.H)
+			require.InEpsilon(t, expectedSample.F, actualSample.F, 1e-10)
+		}
+	case parser.ValueTypeMatrix:
+		expectedMatrix, err := expected.Matrix()
+		require.NoError(t, err)
+		actualMatrix, err := actual.Matrix()
+		require.NoError(t, err)
+
+		require.Len(t, actualMatrix, len(expectedMatrix))
+
+		for i, expectedSeries := range expectedMatrix {
+			actualSeries := actualMatrix[i]
+
+			require.Equal(t, expectedSeries.Metric, actualSeries.Metric)
+			require.Equal(t, expectedSeries.Histograms, actualSeries.Histograms)
+
+			for j, expectedPoint := range expectedSeries.Floats {
+				actualPoint := actualSeries.Floats[j]
+
+				require.Equal(t, expectedPoint.T, actualPoint.T)
+				require.InEpsilonf(t, expectedPoint.F, actualPoint.F, 1e-10, "expected series %v to have points %v, but result is %v", expectedSeries.Metric.String(), expectedSeries.Floats, actualSeries.Floats)
+			}
+		}
+	default:
+		require.Fail(t, "unexpected value type", "type: %v", expected.Value.Type())
+	}
+}
+
+func setupTestData(db *tsdb.DB, metricSizes []int, interval, numIntervals int) error {
+	totalMetrics := 0
+
+	for _, size := range metricSizes {
+		totalMetrics += 13 * size // 2 non-histogram metrics + 11 metrics for histogram buckets
+	}
+
+	metrics := make([]labels.Labels, 0, totalMetrics)
+
+	for _, size := range metricSizes {
+		aName := "a_" + strconv.Itoa(size)
+		bName := "b_" + strconv.Itoa(size)
+		histogramName := "h_" + strconv.Itoa(size)
+
+		if size == 1 {
+			// We don't want a "l" label on metrics with one series (some test cases rely on this label not being present).
+			metrics = append(metrics, labels.FromStrings("__name__", aName))
+			metrics = append(metrics, labels.FromStrings("__name__", bName))
+			for le := 0; le < 10; le++ {
+				metrics = append(metrics, labels.FromStrings("__name__", histogramName, "le", strconv.Itoa(le)))
+			}
+			metrics = append(metrics, labels.FromStrings("__name__", histogramName, "le", "+Inf"))
+		} else {
+			for i := 0; i < size; i++ {
+				metrics = append(metrics, labels.FromStrings("__name__", aName, "l", strconv.Itoa(i)))
+				metrics = append(metrics, labels.FromStrings("__name__", bName, "l", strconv.Itoa(i)))
+				for le := 0; le < 10; le++ {
+					metrics = append(metrics, labels.FromStrings("__name__", histogramName, "l", strconv.Itoa(i), "le", strconv.Itoa(le)))
+				}
+				metrics = append(metrics, labels.FromStrings("__name__", histogramName, "l", strconv.Itoa(i), "le", "+Inf"))
+			}
+		}
+	}
+
+	refs := make([]storage.SeriesRef, len(metrics))
+
+	for s := 0; s < numIntervals; s++ {
+		a := db.Appender(context.Background())
+		ts := int64(s * interval)
+		for i, metric := range metrics {
+			ref, _ := a.Append(refs[i], metric, ts, float64(s)+float64(i)/float64(len(metrics)))
+			refs[i] = ref
+		}
+		if err := a.Commit(); err != nil {
+			return err
+		}
+	}
+
+	db.ForceHeadMMap() // Ensure we have at most one head chunk for every series.
+	return db.Compact(context.Background())
+}
+
+// This is based on https://github.com/prometheus/prometheus/blob/main/util/teststorage/storage.go, but with isolation disabled
+// to improve test setup performance and mirror Mimir's default configuration.
+func newTestDB(t testing.TB) *tsdb.DB {
+	dir := t.TempDir()
+
+	// Tests just load data for a series sequentially. Thus we need a long appendable window.
+	opts := tsdb.DefaultOptions()
+	opts.MinBlockDuration = int64(24 * time.Hour / time.Millisecond)
+	opts.MaxBlockDuration = int64(24 * time.Hour / time.Millisecond)
+	opts.RetentionDuration = 0
+	opts.EnableNativeHistograms = true
+	opts.IsolationDisabled = true
+	db, err := tsdb.Open(dir, nil, nil, opts, tsdb.NewDBStats())
+	require.NoError(t, err, "unexpected error while opening test storage")
+
+	t.Cleanup(func() {
+		require.NoError(t, db.Close(), "unexpected error while closing test storage")
+	})
+
+	return db
+}
+
+func sortVector(v promql.Vector) {
+	slices.SortFunc(v, func(a, b promql.Sample) int {
+		return labels.Compare(a.Metric, b.Metric)
+	})
+}
diff --git a/pkg/streamingpromql/engine.go b/pkg/streamingpromql/engine.go
new file mode 100644
index 0000000000..d3e7c455d7
--- /dev/null
+++ b/pkg/streamingpromql/engine.go
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/promql/engine.go
+// Provenance-includes-license: Apache-2.0
+// Provenance-includes-copyright: The Prometheus Authors
+
+package streamingpromql
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"time"
+
+	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/storage"
+)
+
+const defaultLookbackDelta = 5 * time.Minute // This should be the same value as github.com/prometheus/prometheus/promql.defaultLookbackDelta.
+
+func NewEngine(opts promql.EngineOpts) (promql.QueryEngine, error) {
+	lookbackDelta := opts.LookbackDelta
+	if lookbackDelta == 0 {
+		lookbackDelta = defaultLookbackDelta
+	}
+
+	if !opts.EnableAtModifier {
+		return nil, errors.New("disabling @ modifier not supported by streaming engine")
+	}
+
+	if !opts.EnableNegativeOffset {
+		return nil, errors.New("disabling negative offsets not supported by streaming engine")
+	}
+
+	if opts.EnablePerStepStats {
+		return nil, errors.New("enabling per-step stats not supported by streaming engine")
+	}
+
+	return &Engine{
+		lookbackDelta: lookbackDelta,
+	}, nil
+}
+
+type Engine struct {
+	lookbackDelta time.Duration
+}
+
+func (e *Engine) NewInstantQuery(_ context.Context, q storage.Queryable, opts promql.QueryOpts, qs string, ts time.Time) (promql.Query, error) {
+	return newQuery(q, opts, qs, ts, ts, 0, e)
+}
+
+func (e *Engine) NewRangeQuery(_ context.Context, q storage.Queryable, opts promql.QueryOpts, qs string, start, end time.Time, interval time.Duration) (promql.Query, error) {
+	if interval <= 0 {
+		return nil, fmt.Errorf("%v is not a valid interval for a range query, must be greater than 0", interval)
+	}
+
+	if end.Before(start) {
+		return nil, fmt.Errorf("range query time range is invalid: end time %v is before start time %v", end.Format(time.RFC3339), start.Format(time.RFC3339))
+	}
+
+	return newQuery(q, opts, qs, start, end, interval, e)
+}
diff --git a/pkg/streamingpromql/engine_test.go b/pkg/streamingpromql/engine_test.go
new file mode 100644
index 0000000000..654d3616c7
--- /dev/null
+++ b/pkg/streamingpromql/engine_test.go
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+
+package streamingpromql
+
+import (
+	"context"
+	"io"
+	"io/fs"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/prometheus/prometheus/promql"
+	"github.com/stretchr/testify/require"
+)
+
+func TestUnsupportedPromQLFeatures(t *testing.T) {
+	db := newTestDB(t)
+	opts := newTestEngineOpts()
+	engine, err := NewEngine(opts)
+	require.NoError(t, err)
+	ctx := context.Background()
+
+	// The goal of this is not to list every conceivable expression that is unsupported, but to cover all the
+	// different cases and make sure we produce a reasonable error message when these cases are encountered.
+	unsupportedExpressions := map[string]string{
+		"a + b":                        "PromQL expression type *parser.BinaryExpr",
+		"1 + 2":                        "PromQL expression type *parser.BinaryExpr",
+		"metric{} + other_metric{}":    "PromQL expression type *parser.BinaryExpr",
+		"1":                            "PromQL expression type *parser.NumberLiteral",
+		"metric{} offset 2h":           "instant vector selector with 'offset'",
+		"avg(metric{})":                "'avg' aggregation",
+		"sum without(l) (metric{})":    "grouping with 'without'",
+		"rate(metric{}[5m] offset 2h)": "range vector selector with 'offset'",
+		"avg_over_time(metric{}[5m])":  "'avg_over_time' function",
+		"-sum(metric{})":               "PromQL expression type *parser.UnaryExpr",
+	}
+
+	for expression, expectedError := range unsupportedExpressions {
+		t.Run(expression, func(t *testing.T) {
+			qry, err := engine.NewRangeQuery(ctx, db, nil, expression, time.Now().Add(-time.Hour), time.Now(), time.Minute)
+			require.Error(t, err)
+			require.ErrorIs(t, err, ErrNotSupported)
+			require.EqualError(t, err, "not supported by streaming engine: "+expectedError)
+			require.Nil(t, qry)
+
+			qry, err = engine.NewInstantQuery(ctx, db, nil, expression, time.Now())
+			require.Error(t, err)
+			require.ErrorIs(t, err, ErrNotSupported)
+			require.EqualError(t, err, "not supported by streaming engine: "+expectedError)
+			require.Nil(t, qry)
+		})
+	}
+
+	// These expressions are also unsupported, but are only valid as instant queries.
+	unsupportedInstantQueryExpressions := map[string]string{
+		"'a'":                    "PromQL expression type *parser.StringLiteral",
+		"metric{}[5m]":           "PromQL expression type *parser.MatrixSelector",
+		"metric{}[5m] offset 2h": "PromQL expression type *parser.MatrixSelector",
+		"metric{}[5m] @ 123":     "PromQL expression type *parser.MatrixSelector",
+		"metric{}[5m] @ start()": "PromQL expression type *parser.MatrixSelector",
+		"metric{}[5m] @ end()":   "PromQL expression type *parser.MatrixSelector",
+		"metric{}[5m:1m]":        "PromQL expression type *parser.SubqueryExpr",
+	}
+
+	for expression, expectedError := range unsupportedInstantQueryExpressions {
+		t.Run(expression, func(t *testing.T) {
+			qry, err := engine.NewInstantQuery(ctx, db, nil, expression, time.Now())
+			require.Error(t, err)
+			require.ErrorIs(t, err, ErrNotSupported)
+			require.EqualError(t, err, "not supported by streaming engine: "+expectedError)
+			require.Nil(t, qry)
+		})
+	}
+}
+
+func TestNewRangeQuery_InvalidQueryTime(t *testing.T) {
+	opts := newTestEngineOpts()
+	engine, err := NewEngine(opts)
+	require.NoError(t, err)
+	ctx := context.Background()
+
+	_, err = engine.NewRangeQuery(ctx, nil, nil, "vector(0)", time.Now(), time.Now(), 0)
+	require.EqualError(t, err, "0s is not a valid interval for a range query, must be greater than 0")
+
+	start := time.Date(2024, 3, 22, 3, 0, 0, 0, time.UTC)
+	_, err = engine.NewRangeQuery(ctx, nil, nil, "vector(0)", start, start.Add(-time.Hour), time.Second)
+	require.EqualError(t, err, "range query time range is invalid: end time 2024-03-22T02:00:00Z is before start time 2024-03-22T03:00:00Z")
+}
+
+func TestNewRangeQuery_InvalidExpressionTypes(t *testing.T) {
+	opts := newTestEngineOpts()
+	engine, err := NewEngine(opts)
+	require.NoError(t, err)
+	ctx := context.Background()
+
+	_, err = engine.NewRangeQuery(ctx, nil, nil, "metric[3m]", time.Now(), time.Now(), time.Second)
+	require.EqualError(t, err, "query expression produces a range vector, but expression for range queries must produce an instant vector or scalar")
+
+	_, err = engine.NewRangeQuery(ctx, nil, nil, `"thing"`, time.Now(), time.Now(), time.Second)
+	require.EqualError(t, err, "query expression produces a string, but expression for range queries must produce an instant vector or scalar")
+}
+
+// This test runs the test cases defined upstream in https://github.com/prometheus/prometheus/tree/main/promql/testdata and copied to testdata/upstream.
+// Test cases that are not supported by the streaming engine are commented out (or, if the entire file is not supported, .disabled is appended to the file name).
+// Once the streaming engine supports all PromQL features exercised by Prometheus' test cases, we can remove these files and instead call promql.RunBuiltinTests here instead.
+func TestUpstreamTestCases(t *testing.T) {
+	opts := newTestEngineOpts()
+	engine, err := NewEngine(opts)
+	require.NoError(t, err)
+
+	testdataFS := os.DirFS("./testdata")
+	testFiles, err := fs.Glob(testdataFS, "upstream/*.test")
+	require.NoError(t, err)
+
+	for _, testFile := range testFiles {
+		t.Run(testFile, func(t *testing.T) {
+			f, err := testdataFS.Open(testFile)
+			require.NoError(t, err)
+			defer f.Close()
+
+			testScript, err := io.ReadAll(f)
+			require.NoError(t, err)
+
+			promql.RunTest(t, string(testScript), engine)
+		})
+	}
+}
+
+func TestOurTestCases(t *testing.T) {
+	opts := newTestEngineOpts()
+	streamingEngine, err := NewEngine(opts)
+	require.NoError(t, err)
+
+	prometheusEngine := promql.NewEngine(opts)
+
+	testdataFS := os.DirFS("./testdata")
+	testFiles, err := fs.Glob(testdataFS, "ours/*.test")
+	require.NoError(t, err)
+
+	for _, testFile := range testFiles {
+		t.Run(testFile, func(t *testing.T) {
+			f, err := testdataFS.Open(testFile)
+			require.NoError(t, err)
+			defer f.Close()
+
+			b, err := io.ReadAll(f)
+			require.NoError(t, err)
+
+			testScript := string(b)
+
+			t.Run("streaming engine", func(t *testing.T) {
+				promql.RunTest(t, testScript, streamingEngine)
+			})
+
+			// Run the tests against Prometheus' engine to ensure our test cases are valid.
+			t.Run("Prometheus' engine", func(t *testing.T) {
+				promql.RunTest(t, testScript, prometheusEngine)
+			})
+		})
+	}
+}
+
+func newTestEngineOpts() promql.EngineOpts {
+	return promql.EngineOpts{
+		Logger:               nil,
+		Reg:                  nil,
+		MaxSamples:           50000000,
+		Timeout:              100 * time.Second,
+		EnableAtModifier:     true,
+		EnableNegativeOffset: true,
+	}
+}
diff --git a/pkg/streamingpromql/errors.go b/pkg/streamingpromql/errors.go
new file mode 100644
index 0000000000..7e1255e643
--- /dev/null
+++ b/pkg/streamingpromql/errors.go
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+
+package streamingpromql
+
+import (
+	"errors"
+	"fmt"
+)
+
+var ErrNotSupported = errors.New("not supported by streaming engine")
+
+func NewNotSupportedError(detail string) error {
+	return fmt.Errorf("%w: %s", ErrNotSupported, detail)
+}
diff --git a/pkg/streamingpromql/operator/aggregation.go b/pkg/streamingpromql/operator/aggregation.go
new file mode 100644
index 0000000000..0e676bbc77
--- /dev/null
+++ b/pkg/streamingpromql/operator/aggregation.go
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/promql/engine.go
+// Provenance-includes-license: Apache-2.0
+// Provenance-includes-copyright: The Prometheus Authors
+
+package operator
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"sort"
+	"time"
+
+	"github.com/prometheus/prometheus/model/labels"
+	"github.com/prometheus/prometheus/model/timestamp"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/util/zeropool"
+)
+
+type Aggregation struct {
+	Inner    InstantVectorOperator
+	Start    time.Time
+	End      time.Time
+	Interval time.Duration
+	Grouping []string
+
+	remainingInnerSeriesToGroup []*group // One entry per series produced by Inner, value is the group for that series
+	remainingGroups             []*group // One entry per group, in the order we want to return them
+}
+
+type group struct {
+	labels labels.Labels
+
+	// The number of input series that belong to this group that we haven't yet seen.
+	remainingSeriesCount uint
+
+	// Sum and presence for each step.
+	sums    []float64
+	present []bool
+}
+
+var _ InstantVectorOperator = &Aggregation{}
+
+var groupPool = zeropool.New(func() *group {
+	return &group{}
+})
+
+func (a *Aggregation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, error) {
+	// Fetch the source series
+	innerSeries, err := a.Inner.SeriesMetadata(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	defer PutSeriesMetadataSlice(innerSeries)
+
+	if len(innerSeries) == 0 {
+		// No input series == no output series.
+		return nil, nil
+	}
+
+	// Determine the groups we'll return
+	groups := map[uint64]*group{}
+	buf := make([]byte, 0, 1024)
+	lb := labels.NewBuilder(labels.EmptyLabels())
+	a.remainingInnerSeriesToGroup = make([]*group, 0, len(innerSeries))
+
+	for _, series := range innerSeries {
+		var groupingKey uint64
+		groupingKey, buf = series.Labels.HashForLabels(buf, a.Grouping...)
+		g, groupExists := groups[groupingKey]
+
+		if !groupExists {
+			g = groupPool.Get()
+			g.labels = a.labelsForGroup(series.Labels, lb)
+			g.remainingSeriesCount = 0
+
+			groups[groupingKey] = g
+		}
+
+		g.remainingSeriesCount++
+		a.remainingInnerSeriesToGroup = append(a.remainingInnerSeriesToGroup, g)
+	}
+
+	// Sort the list of series we'll return, and maintain the order of the corresponding groups at the same time
+	seriesMetadata := GetSeriesMetadataSlice(len(groups))
+	a.remainingGroups = make([]*group, 0, len(groups))
+
+	for _, g := range groups {
+		seriesMetadata = append(seriesMetadata, SeriesMetadata{Labels: g.labels})
+		a.remainingGroups = append(a.remainingGroups, g)
+	}
+
+	sort.Sort(groupSorter{seriesMetadata, a.remainingGroups})
+
+	return seriesMetadata, nil
+}
+
+func (a *Aggregation) labelsForGroup(m labels.Labels, lb *labels.Builder) labels.Labels {
+	if len(a.Grouping) == 0 {
+		return labels.EmptyLabels()
+	}
+
+	lb.Reset(m)
+	lb.Keep(a.Grouping...)
+	return lb.Labels()
+}
+
+func (a *Aggregation) Next(ctx context.Context) (InstantVectorSeriesData, error) {
+	if len(a.remainingGroups) == 0 {
+		// No more groups left.
+		return InstantVectorSeriesData{}, EOS
+	}
+
+	start := timestamp.FromTime(a.Start)
+	end := timestamp.FromTime(a.End)
+	interval := a.Interval.Milliseconds()
+	steps := stepCount(start, end, interval)
+
+	// Determine next group to return
+	thisGroup := a.remainingGroups[0]
+	a.remainingGroups = a.remainingGroups[1:]
+
+	// Iterate through inner series until the desired group is complete
+	for thisGroup.remainingSeriesCount > 0 {
+		s, err := a.Inner.Next(ctx)
+
+		if err != nil {
+			if errors.Is(err, EOS) {
+				return InstantVectorSeriesData{}, fmt.Errorf("exhausted series before all groups were completed: %w", err)
+			}
+
+			return InstantVectorSeriesData{}, err
+		}
+
+		thisSeriesGroup := a.remainingInnerSeriesToGroup[0]
+		a.remainingInnerSeriesToGroup = a.remainingInnerSeriesToGroup[1:]
+
+		if thisSeriesGroup.sums == nil {
+			// First series for this group, populate it
+			thisSeriesGroup.sums = GetFloatSlice(steps)[:steps]
+			thisSeriesGroup.present = GetBoolSlice(steps)[:steps]
+		}
+
+		for _, p := range s.Floats {
+			idx := (p.T - start) / interval
+			thisSeriesGroup.sums[idx] += p.F
+			thisSeriesGroup.present[idx] = true
+		}
+
+		PutFPointSlice(s.Floats)
+		thisSeriesGroup.remainingSeriesCount--
+	}
+
+	// Construct the group and return it
+	pointCount := 0
+	for _, p := range thisGroup.present {
+		if p {
+			pointCount++
+		}
+	}
+
+	points := GetFPointSlice(pointCount)
+
+	for i, havePoint := range thisGroup.present {
+		if havePoint {
+			t := start + int64(i)*interval
+			points = append(points, promql.FPoint{T: t, F: thisGroup.sums[i]})
+		}
+	}
+
+	PutFloatSlice(thisGroup.sums)
+	PutBoolSlice(thisGroup.present)
+
+	thisGroup.sums = nil
+	thisGroup.present = nil
+	groupPool.Put(thisGroup)
+
+	return InstantVectorSeriesData{Floats: points}, nil
+}
+
+func (a *Aggregation) Close() {
+	a.Inner.Close()
+}
+
+type groupSorter struct {
+	metadata []SeriesMetadata
+	groups   []*group
+}
+
+func (g groupSorter) Len() int {
+	return len(g.metadata)
+}
+
+func (g groupSorter) Less(i, j int) bool {
+	return labels.Compare(g.metadata[i].Labels, g.metadata[j].Labels) < 0
+}
+
+func (g groupSorter) Swap(i, j int) {
+	g.metadata[i], g.metadata[j] = g.metadata[j], g.metadata[i]
+	g.groups[i], g.groups[j] = g.groups[j], g.groups[i]
+}
diff --git a/pkg/streamingpromql/operator/instant_vector_selector.go b/pkg/streamingpromql/operator/instant_vector_selector.go
new file mode 100644
index 0000000000..e72b955b72
--- /dev/null
+++ b/pkg/streamingpromql/operator/instant_vector_selector.go
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/promql/engine.go
+// Provenance-includes-license: Apache-2.0
+// Provenance-includes-copyright: The Prometheus Authors
+
+package operator
+
+import (
+	"context"
+	"errors"
+	"fmt"
+
+	"github.com/prometheus/prometheus/model/histogram"
+	"github.com/prometheus/prometheus/model/value"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/storage"
+	"github.com/prometheus/prometheus/tsdb/chunkenc"
+)
+
+type InstantVectorSelector struct {
+	Selector *Selector
+
+	numSteps int
+
+	chunkIterator    chunkenc.Iterator
+	memoizedIterator *storage.MemoizedSeriesIterator
+}
+
+var _ InstantVectorOperator = &InstantVectorSelector{}
+
+func (v *InstantVectorSelector) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, error) {
+	// Compute value we need on every call to Next() once, here.
+	v.numSteps = stepCount(v.Selector.Start, v.Selector.End, v.Selector.Interval)
+
+	return v.Selector.SeriesMetadata(ctx)
+}
+
+func (v *InstantVectorSelector) Next(_ context.Context) (InstantVectorSeriesData, error) {
+	if v.memoizedIterator == nil {
+		v.memoizedIterator = storage.NewMemoizedEmptyIterator(v.Selector.LookbackDelta.Milliseconds())
+	}
+
+	var err error
+	v.chunkIterator, err = v.Selector.Next(v.chunkIterator)
+	if err != nil {
+		return InstantVectorSeriesData{}, err
+	}
+
+	v.memoizedIterator.Reset(v.chunkIterator)
+
+	data := InstantVectorSeriesData{
+		Floats: GetFPointSlice(v.numSteps), // TODO: only allocate this if we have any floats (once we support native histograms)
+	}
+
+	for stepT := v.Selector.Start; stepT <= v.Selector.End; stepT += v.Selector.Interval {
+		var t int64
+		var val float64
+		var h *histogram.FloatHistogram
+
+		ts := stepT
+		if v.Selector.Timestamp != nil {
+			ts = *v.Selector.Timestamp
+		}
+
+		valueType := v.memoizedIterator.Seek(ts)
+
+		switch valueType {
+		case chunkenc.ValNone:
+			if v.memoizedIterator.Err() != nil {
+				return InstantVectorSeriesData{}, v.memoizedIterator.Err()
+			}
+		case chunkenc.ValFloat:
+			t, val = v.memoizedIterator.At()
+		default:
+			return InstantVectorSeriesData{}, fmt.Errorf("streaming PromQL engine: unknown value type %s", valueType.String())
+		}
+
+		if valueType == chunkenc.ValNone || t > ts {
+			var ok bool
+			t, val, h, ok = v.memoizedIterator.PeekPrev()
+			if h != nil {
+				return InstantVectorSeriesData{}, errors.New("streaming PromQL engine doesn't support histograms yet")
+			}
+			if !ok || t < ts-v.Selector.LookbackDelta.Milliseconds() {
+				continue
+			}
+		}
+		if value.IsStaleNaN(val) || (h != nil && value.IsStaleNaN(h.Sum)) {
+			continue
+		}
+
+		data.Floats = append(data.Floats, promql.FPoint{T: stepT, F: val})
+	}
+
+	if v.memoizedIterator.Err() != nil {
+		return InstantVectorSeriesData{}, v.memoizedIterator.Err()
+	}
+
+	return data, nil
+}
+
+func (v *InstantVectorSelector) Close() {
+	if v.Selector != nil {
+		v.Selector.Close()
+	}
+}
diff --git a/pkg/streamingpromql/operator/operator.go b/pkg/streamingpromql/operator/operator.go
new file mode 100644
index 0000000000..a778cf2034
--- /dev/null
+++ b/pkg/streamingpromql/operator/operator.go
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+
+package operator
+
+import (
+	"context"
+	"errors"
+
+	"github.com/prometheus/prometheus/model/labels"
+	"github.com/prometheus/prometheus/promql"
+)
+
+// InstantVectorOperator represents all operators that produce instant vectors.
+type InstantVectorOperator interface {
+	// SeriesMetadata returns a list of all series that will be returned by this operator.
+	// The returned []SeriesMetadata can be modified by the caller or returned to a pool.
+	// SeriesMetadata may return series in any order, but the same order must be used by both SeriesMetadata and Next.
+	// SeriesMetadata should be called no more than once.
+	SeriesMetadata(ctx context.Context) ([]SeriesMetadata, error)
+
+	// Next returns the next series from this operator, or EOS otherwise.
+	// SeriesMetadata must be called exactly once before calling Next.
+	// The returned InstantVectorSeriesData can be modified by the caller or returned to a pool.
+	// The returned InstantVectorSeriesData can contain no points.
+	Next(ctx context.Context) (InstantVectorSeriesData, error)
+
+	// Close frees all resources associated with this operator.
+	// Calling SeriesMetadata or Next after calling Close may result in unpredictable behaviour, corruption or crashes.
+	Close()
+}
+
+var EOS = errors.New("operator stream exhausted") //nolint:revive
+
+type SeriesMetadata struct {
+	Labels labels.Labels
+}
+
+type InstantVectorSeriesData struct {
+	Floats     []promql.FPoint
+	Histograms []promql.HPoint
+}
diff --git a/pkg/streamingpromql/operator/pool.go b/pkg/streamingpromql/operator/pool.go
new file mode 100644
index 0000000000..76c65cf904
--- /dev/null
+++ b/pkg/streamingpromql/operator/pool.go
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+
+package operator
+
+import (
+	"github.com/prometheus/prometheus/promql"
+
+	"github.com/grafana/mimir/pkg/util/pool"
+)
+
+const (
+	maxExpectedPointsPerSeries = 100_000 // There's not too much science behind this number: 100000 points allows for a point per minute for just under 70 days.
+
+	maxExpectedSeriesPerResult = 10_000_000 // Likewise, there's not too much science behind this number: this is the based on examining the largest queries seen at Grafana Labs.
+)
+
+var (
+	fPointSlicePool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, 10, func(size int) []promql.FPoint {
+		return make([]promql.FPoint, 0, size)
+	})
+
+	matrixPool = pool.NewBucketedPool(1, maxExpectedSeriesPerResult, 10, func(size int) promql.Matrix {
+		return make(promql.Matrix, 0, size)
+	})
+
+	vectorPool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, 10, func(size int) promql.Vector {
+		return make(promql.Vector, 0, size)
+	})
+
+	seriesMetadataSlicePool = pool.NewBucketedPool(1, maxExpectedSeriesPerResult, 10, func(size int) []SeriesMetadata {
+		return make([]SeriesMetadata, 0, size)
+	})
+
+	floatSlicePool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, 10, func(_ int) []float64 {
+		// Don't allocate a new slice now - we'll allocate one in GetFloatSlice if we need it, so we can differentiate between reused and new slices.
+		return nil
+	})
+	boolSlicePool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, 10, func(_ int) []bool {
+		// Don't allocate a new slice now - we'll allocate one in GetBoolSlice if we need it, so we can differentiate between reused and new slices.
+		return nil
+	})
+)
+
+func GetFPointSlice(size int) []promql.FPoint {
+	return fPointSlicePool.Get(size)
+}
+
+func PutFPointSlice(s []promql.FPoint) {
+	fPointSlicePool.Put(s)
+}
+
+func GetMatrix(size int) promql.Matrix {
+	return matrixPool.Get(size)
+}
+
+func PutMatrix(m promql.Matrix) {
+	matrixPool.Put(m)
+}
+
+func GetVector(size int) promql.Vector {
+	return vectorPool.Get(size)
+}
+
+func PutVector(v promql.Vector) {
+	vectorPool.Put(v)
+}
+
+func GetSeriesMetadataSlice(size int) []SeriesMetadata {
+	return seriesMetadataSlicePool.Get(size)
+}
+
+func PutSeriesMetadataSlice(s []SeriesMetadata) {
+	seriesMetadataSlicePool.Put(s)
+}
+
+func GetFloatSlice(size int) []float64 {
+	s := floatSlicePool.Get(size)
+	if s != nil {
+		return zeroFloatSlice(s, size)
+	}
+
+	return make([]float64, 0, size)
+}
+
+func PutFloatSlice(s []float64) {
+	floatSlicePool.Put(s)
+}
+
+func GetBoolSlice(size int) []bool {
+	s := boolSlicePool.Get(size)
+
+	if s != nil {
+		return zeroBoolSlice(s, size)
+	}
+
+	return make([]bool, 0, size)
+}
+
+func PutBoolSlice(s []bool) {
+	boolSlicePool.Put(s)
+}
+
+func zeroFloatSlice(s []float64, size int) []float64 {
+	s = s[:size]
+
+	for i := range s {
+		s[i] = 0
+	}
+
+	return s[:0]
+}
+
+func zeroBoolSlice(s []bool, size int) []bool {
+	s = s[:size]
+
+	for i := range s {
+		s[i] = false
+	}
+
+	return s[:0]
+}
diff --git a/pkg/streamingpromql/operator/range_vector_selector_with_transformation.go b/pkg/streamingpromql/operator/range_vector_selector_with_transformation.go
new file mode 100644
index 0000000000..98ca848f09
--- /dev/null
+++ b/pkg/streamingpromql/operator/range_vector_selector_with_transformation.go
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/promql/engine.go
+// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/promql/functions.go
+// Provenance-includes-license: Apache-2.0
+// Provenance-includes-copyright: The Prometheus Authors
+
+package operator
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/prometheus/prometheus/model/labels"
+	"github.com/prometheus/prometheus/model/value"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/tsdb/chunkenc"
+)
+
+// RangeVectorSelectorWithTransformation performs a rate calculation over a range vector selector.
+//
+// This will one day be split into two operators: the rate calculation operator and a range vector selector operator.
+type RangeVectorSelectorWithTransformation struct {
+	Selector *Selector
+
+	rangeMilliseconds int64
+	numSteps          int
+
+	chunkIterator chunkenc.Iterator
+	buffer        *RingBuffer
+}
+
+var _ InstantVectorOperator = &RangeVectorSelectorWithTransformation{}
+
+func (m *RangeVectorSelectorWithTransformation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, error) {
+	// Compute values we need on every call to Next() once, here.
+	m.rangeMilliseconds = m.Selector.Range.Milliseconds()
+	m.numSteps = stepCount(m.Selector.Start, m.Selector.End, m.Selector.Interval)
+
+	metadata, err := m.Selector.SeriesMetadata(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	lb := labels.NewBuilder(labels.EmptyLabels())
+	for i := range metadata {
+		metadata[i].Labels = dropMetricName(metadata[i].Labels, lb)
+	}
+
+	return metadata, nil
+}
+
+func dropMetricName(l labels.Labels, lb *labels.Builder) labels.Labels {
+	lb.Reset(l)
+	lb.Del(labels.MetricName)
+	return lb.Labels()
+}
+
+func (m *RangeVectorSelectorWithTransformation) Next(_ context.Context) (InstantVectorSeriesData, error) {
+	if m.buffer == nil {
+		m.buffer = &RingBuffer{}
+	}
+
+	var err error
+	m.chunkIterator, err = m.Selector.Next(m.chunkIterator)
+	if err != nil {
+		return InstantVectorSeriesData{}, err
+	}
+
+	m.buffer.Reset()
+
+	data := InstantVectorSeriesData{
+		Floats: GetFPointSlice(m.numSteps), // TODO: only allocate this if we have any floats (once we support native histograms)
+	}
+
+	// TODO: handle native histograms
+	for stepT := m.Selector.Start; stepT <= m.Selector.End; stepT += m.Selector.Interval {
+		rangeEnd := stepT
+
+		if m.Selector.Timestamp != nil {
+			rangeEnd = *m.Selector.Timestamp
+		}
+
+		rangeStart := rangeEnd - m.rangeMilliseconds
+		m.buffer.DiscardPointsBefore(rangeStart)
+
+		if err := m.fillBuffer(rangeStart, rangeEnd); err != nil {
+			return InstantVectorSeriesData{}, err
+		}
+
+		head, tail := m.buffer.Points()
+		count := len(head) + len(tail)
+
+		if count < 2 {
+			// Not enough points, skip.
+			continue
+		}
+
+		firstPoint := m.buffer.First()
+		lastPoint := m.buffer.Last()
+		delta := lastPoint.F - firstPoint.F
+		previousValue := firstPoint.F
+
+		accumulate := func(points []promql.FPoint) {
+			for _, p := range points {
+				if p.T > rangeEnd { // The buffer is already guaranteed to only contain points >= rangeStart.
+					return
+				}
+
+				if p.F < previousValue {
+					// Counter reset.
+					delta += previousValue
+				}
+
+				previousValue = p.F
+			}
+		}
+
+		accumulate(head)
+		accumulate(tail)
+
+		val := m.calculateRate(rangeStart, rangeEnd, firstPoint, lastPoint, delta, count)
+
+		data.Floats = append(data.Floats, promql.FPoint{T: stepT, F: val})
+	}
+
+	return data, nil
+}
+
+func (m *RangeVectorSelectorWithTransformation) fillBuffer(rangeStart, rangeEnd int64) error {
+	// Keep filling the buffer until we reach the end of the range or the end of the iterator.
+	for {
+		valueType := m.chunkIterator.Next()
+
+		switch valueType {
+		case chunkenc.ValNone:
+			// No more data. We are done.
+			return m.chunkIterator.Err()
+		case chunkenc.ValFloat:
+			t, f := m.chunkIterator.At()
+			if value.IsStaleNaN(f) || t < rangeStart {
+				continue
+			}
+
+			m.buffer.Append(promql.FPoint{T: t, F: f})
+
+			if t >= rangeEnd {
+				return nil
+			}
+		default:
+			// TODO: handle native histograms
+			return fmt.Errorf("unknown value type %s", valueType.String())
+		}
+	}
+}
+
+// This is based on extrapolatedRate from promql/functions.go.
+// https://github.com/prometheus/prometheus/pull/13725 has a good explanation of the intended behaviour here.
+func (m *RangeVectorSelectorWithTransformation) calculateRate(rangeStart, rangeEnd int64, firstPoint, lastPoint promql.FPoint, delta float64, count int) float64 {
+	durationToStart := float64(firstPoint.T-rangeStart) / 1000
+	durationToEnd := float64(rangeEnd-lastPoint.T) / 1000
+
+	sampledInterval := float64(lastPoint.T-firstPoint.T) / 1000
+	averageDurationBetweenSamples := sampledInterval / float64(count-1)
+
+	extrapolationThreshold := averageDurationBetweenSamples * 1.1
+	extrapolateToInterval := sampledInterval
+
+	if durationToStart >= extrapolationThreshold {
+		durationToStart = averageDurationBetweenSamples / 2
+	}
+
+	if delta > 0 && firstPoint.F >= 0 {
+		durationToZero := sampledInterval * (firstPoint.F / delta)
+		if durationToZero < durationToStart {
+			durationToStart = durationToZero
+		}
+	}
+
+	extrapolateToInterval += durationToStart
+
+	if durationToEnd >= extrapolationThreshold {
+		durationToEnd = averageDurationBetweenSamples / 2
+	}
+
+	extrapolateToInterval += durationToEnd
+
+	factor := extrapolateToInterval / sampledInterval
+	factor /= m.Selector.Range.Seconds()
+	return delta * factor
+}
+
+func (m *RangeVectorSelectorWithTransformation) Close() {
+	if m.Selector != nil {
+		m.Selector.Close()
+	}
+
+	if m.buffer != nil {
+		m.buffer.Close()
+	}
+}
diff --git a/pkg/streamingpromql/operator/ring_buffer.go b/pkg/streamingpromql/operator/ring_buffer.go
new file mode 100644
index 0000000000..b5dc76d957
--- /dev/null
+++ b/pkg/streamingpromql/operator/ring_buffer.go
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+
+package operator
+
+import "github.com/prometheus/prometheus/promql"
+
+type RingBuffer struct {
+	points     []promql.FPoint
+	firstIndex int // Index into 'points' of first point in this buffer.
+	size       int // Number of points in this buffer.
+}
+
+// DiscardPointsBefore discards all points in this buffer with timestamp less than t.
+func (b *RingBuffer) DiscardPointsBefore(t int64) {
+	for b.size > 0 && b.points[b.firstIndex].T < t {
+		b.firstIndex++
+		b.size--
+
+		if b.firstIndex >= len(b.points) {
+			b.firstIndex = 0
+		}
+	}
+
+	if b.size == 0 {
+		b.firstIndex = 0
+	}
+}
+
+// Points returns slices of the points in this buffer.
+// Either or both slice could be empty.
+// Callers must not modify the values in the returned slices.
+//
+// FIXME: the fact we have to expose this is a bit gross, but the overhead of calling a function with ForEach is terrible.
+// Perhaps we can use range-over function iterators (https://go.dev/wiki/RangefuncExperiment) once this is not experimental?
+func (b *RingBuffer) Points() ([]promql.FPoint, []promql.FPoint) {
+	endOfTailSegment := b.firstIndex + b.size
+
+	if endOfTailSegment > len(b.points) {
+		// Need to wrap around.
+		endOfHeadSegment := endOfTailSegment % len(b.points)
+		endOfTailSegment = len(b.points)
+		return b.points[b.firstIndex:endOfTailSegment], b.points[0:endOfHeadSegment]
+	}
+
+	return b.points[b.firstIndex:endOfTailSegment], nil
+}
+
+// ForEach calls f for each point in this buffer.
+func (b *RingBuffer) ForEach(f func(p promql.FPoint)) {
+	if b.size == 0 {
+		return
+	}
+
+	lastIndexPlusOne := b.firstIndex + b.size
+
+	if lastIndexPlusOne > len(b.points) {
+		lastIndexPlusOne = len(b.points)
+	}
+
+	for i := b.firstIndex; i < lastIndexPlusOne; i++ {
+		f(b.points[i])
+	}
+
+	if b.firstIndex+b.size < len(b.points) {
+		// Don't need to wrap around to start of buffer.
+		return
+	}
+
+	for i := 0; i < (b.firstIndex+b.size)%len(b.points); i++ {
+		f(b.points[i])
+	}
+}
+
+// Append adds p to this buffer, expanding it if required.
+// If this buffer is non-empty, p.T must be greater than or equal to the
+// timestamp of the last point in the buffer.
+func (b *RingBuffer) Append(p promql.FPoint) {
+	if b.size == len(b.points) {
+		// Create a new slice, copy the elements from the current slice.
+		newSize := b.size * 2
+		if newSize == 0 {
+			newSize = 2
+		}
+
+		newSlice := GetFPointSlice(newSize)
+		newSlice = newSlice[:cap(newSlice)]
+		pointsAtEnd := b.size - b.firstIndex
+		copy(newSlice, b.points[b.firstIndex:])
+		copy(newSlice[pointsAtEnd:], b.points[:b.firstIndex])
+
+		PutFPointSlice(b.points)
+		b.points = newSlice
+		b.firstIndex = 0
+	}
+
+	nextIndex := (b.firstIndex + b.size) % len(b.points)
+	b.points[nextIndex] = p
+	b.size++
+}
+
+// Reset clears the contents of this buffer.
+func (b *RingBuffer) Reset() {
+	b.firstIndex = 0
+	b.size = 0
+}
+
+// Close releases any resources associated with this buffer.
+func (b *RingBuffer) Close() {
+	b.Reset()
+	PutFPointSlice(b.points)
+	b.points = nil
+}
+
+// First returns the first point in this buffer.
+// It panics if the buffer is empty.
+func (b *RingBuffer) First() promql.FPoint {
+	if b.size == 0 {
+		panic("Can't get first element of empty buffer")
+	}
+
+	return b.points[b.firstIndex]
+}
+
+// Last returns the last point in this buffer.
+// It panics if the buffer is empty.
+func (b *RingBuffer) Last() promql.FPoint {
+	if b.size == 0 {
+		panic("Can't get last element of empty buffer")
+	}
+
+	return b.points[(b.firstIndex+b.size-1)%len(b.points)]
+}
diff --git a/pkg/streamingpromql/operator/ring_buffer_test.go b/pkg/streamingpromql/operator/ring_buffer_test.go
new file mode 100644
index 0000000000..361db49429
--- /dev/null
+++ b/pkg/streamingpromql/operator/ring_buffer_test.go
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+
+package operator
+
+import (
+	"testing"
+
+	"github.com/prometheus/prometheus/promql"
+	"github.com/stretchr/testify/require"
+)
+
+func TestRingBuffer(t *testing.T) {
+	buf := &RingBuffer{}
+	shouldHaveNoPoints(t, buf)
+
+	buf.DiscardPointsBefore(1) // Should handle empty buffer.
+	shouldHaveNoPoints(t, buf)
+
+	buf.Append(promql.FPoint{T: 1, F: 100})
+	shouldHavePoints(t, buf, promql.FPoint{T: 1, F: 100})
+
+	buf.Append(promql.FPoint{T: 2, F: 200})
+	shouldHavePoints(t, buf, promql.FPoint{T: 1, F: 100}, promql.FPoint{T: 2, F: 200})
+
+	buf.DiscardPointsBefore(1)
+	shouldHavePoints(t, buf, promql.FPoint{T: 1, F: 100}, promql.FPoint{T: 2, F: 200}) // No change.
+
+	buf.DiscardPointsBefore(2)
+	shouldHavePoints(t, buf, promql.FPoint{T: 2, F: 200})
+
+	buf.Append(promql.FPoint{T: 3, F: 300})
+	shouldHavePoints(t, buf, promql.FPoint{T: 2, F: 200}, promql.FPoint{T: 3, F: 300})
+
+	buf.DiscardPointsBefore(4)
+	shouldHaveNoPoints(t, buf)
+
+	buf.Append(promql.FPoint{T: 4, F: 400})
+	buf.Append(promql.FPoint{T: 5, F: 500})
+	shouldHavePoints(t, buf, promql.FPoint{T: 4, F: 400}, promql.FPoint{T: 5, F: 500})
+
+	// Trigger expansion of buffer (we resize in powers of two, but the underlying slice comes from a pool that uses a factor of 10).
+	// Ideally we wouldn't reach into the internals here, but this helps ensure the test is testing the correct scenario.
+	require.Len(t, buf.points, 10, "expected underlying slice to have length 10, if this assertion fails, the test setup is not as expected")
+	buf.Append(promql.FPoint{T: 6, F: 600})
+	buf.Append(promql.FPoint{T: 7, F: 700})
+	buf.Append(promql.FPoint{T: 8, F: 800})
+	buf.Append(promql.FPoint{T: 9, F: 900})
+	buf.Append(promql.FPoint{T: 10, F: 1000})
+	buf.Append(promql.FPoint{T: 11, F: 1100})
+	buf.Append(promql.FPoint{T: 12, F: 1200})
+	buf.Append(promql.FPoint{T: 13, F: 1300})
+	buf.Append(promql.FPoint{T: 14, F: 1400})
+	require.Greater(t, len(buf.points), 10, "expected underlying slice to be expanded, if this assertion fails, the test setup is not as expected")
+
+	shouldHavePoints(t,
+		buf,
+		promql.FPoint{T: 4, F: 400},
+		promql.FPoint{T: 5, F: 500},
+		promql.FPoint{T: 6, F: 600},
+		promql.FPoint{T: 7, F: 700},
+		promql.FPoint{T: 8, F: 800},
+		promql.FPoint{T: 9, F: 900},
+		promql.FPoint{T: 10, F: 1000},
+		promql.FPoint{T: 11, F: 1100},
+		promql.FPoint{T: 12, F: 1200},
+		promql.FPoint{T: 13, F: 1300},
+		promql.FPoint{T: 14, F: 1400},
+	)
+
+	buf.Reset()
+	shouldHaveNoPoints(t, buf)
+
+	buf.Append(promql.FPoint{T: 9, F: 900})
+	shouldHavePoints(t, buf, promql.FPoint{T: 9, F: 900})
+}
+
+func TestRingBuffer_DiscardPointsBefore_ThroughWrapAround(t *testing.T) {
+	// Set up the buffer so that the first point is part-way through the underlying slice.
+	// We resize in powers of two, but the underlying slice comes from a pool that uses a factor of 10.
+	buf := &RingBuffer{}
+	buf.Append(promql.FPoint{T: 1, F: 100})
+	buf.Append(promql.FPoint{T: 2, F: 200})
+	buf.Append(promql.FPoint{T: 3, F: 300})
+	buf.Append(promql.FPoint{T: 4, F: 400})
+	buf.Append(promql.FPoint{T: 5, F: 500})
+	buf.Append(promql.FPoint{T: 6, F: 600})
+	buf.Append(promql.FPoint{T: 7, F: 700})
+	buf.Append(promql.FPoint{T: 8, F: 800})
+	buf.Append(promql.FPoint{T: 9, F: 900})
+	buf.Append(promql.FPoint{T: 10, F: 1000})
+
+	// Ideally we wouldn't reach into the internals here, but this helps ensure the test is testing the correct scenario.
+	require.Len(t, buf.points, 10, "expected underlying slice to have length 10, if this assertion fails, the test setup is not as expected")
+	buf.DiscardPointsBefore(8)
+	buf.Append(promql.FPoint{T: 11, F: 1100})
+	buf.Append(promql.FPoint{T: 12, F: 1200})
+	buf.Append(promql.FPoint{T: 13, F: 1300})
+
+	// Should not have expanded slice.
+	require.Len(t, buf.points, 10, "expected underlying slice to have length 10, if this assertion fails, the test setup is not as expected")
+
+	// Discard before end of underlying slice.
+	buf.DiscardPointsBefore(9)
+	shouldHavePoints(t,
+		buf,
+		promql.FPoint{T: 9, F: 900},
+		promql.FPoint{T: 10, F: 1000},
+		promql.FPoint{T: 11, F: 1100},
+		promql.FPoint{T: 12, F: 1200},
+		promql.FPoint{T: 13, F: 1300},
+	)
+
+	require.Equal(t, 8, buf.firstIndex, "expected first point to be in middle of underlying slice, if this assertion fails, the test setup is not as expected")
+
+	// Discard after wraparound.
+	buf.DiscardPointsBefore(12)
+	shouldHavePoints(t,
+		buf,
+		promql.FPoint{T: 12, F: 1200},
+		promql.FPoint{T: 13, F: 1300},
+	)
+}
+
+func shouldHaveNoPoints(t *testing.T, buf *RingBuffer) {
+	shouldHavePoints(
+		t,
+		buf,
+		/* nothing */
+	)
+}
+
+func shouldHavePoints(t *testing.T, buf *RingBuffer, expected ...promql.FPoint) {
+	var actual []promql.FPoint
+
+	buf.ForEach(func(p promql.FPoint) {
+		actual = append(actual, p)
+	})
+
+	require.Equal(t, expected, actual)
+
+	head, tail := buf.Points()
+	actual = append(head, tail...)
+
+	if len(actual) == 0 {
+		actual = nil // expected will be nil when it's empty, but appending two empty slices returns a non-nil slice.
+	}
+
+	require.Equal(t, expected, actual)
+
+	if len(actual) == 0 {
+		return
+	}
+
+	require.Equal(t, expected[0], buf.First())
+	require.Equal(t, expected[len(expected)-1], buf.Last())
+}
diff --git a/pkg/streamingpromql/operator/selector.go b/pkg/streamingpromql/operator/selector.go
new file mode 100644
index 0000000000..d1d96313a8
--- /dev/null
+++ b/pkg/streamingpromql/operator/selector.go
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+
+package operator
+
+import (
+	"context"
+	"errors"
+	"sync"
+	"time"
+
+	"github.com/prometheus/prometheus/model/labels"
+	"github.com/prometheus/prometheus/storage"
+	"github.com/prometheus/prometheus/tsdb/chunkenc"
+)
+
+type Selector struct {
+	Queryable storage.Queryable
+	Start     int64 // Milliseconds since Unix epoch
+	End       int64 // Milliseconds since Unix epoch
+	Timestamp *int64
+	Interval  int64 // In milliseconds
+	Matchers  []*labels.Matcher
+
+	// Set for instant vector selectors, otherwise 0.
+	LookbackDelta time.Duration
+
+	// Set for range vector selectors, otherwise 0.
+	Range time.Duration
+
+	querier                   storage.Querier
+	currentSeriesBatch        *seriesBatch
+	seriesIndexInCurrentBatch int
+}
+
+// There's not too much science behind this number: this is based on the batch size used for chunks streaming.
+const seriesBatchSize = 256
+
+var seriesBatchPool = sync.Pool{New: func() any {
+	return &seriesBatch{
+		series: make([]storage.Series, 0, seriesBatchSize),
+		next:   nil,
+	}
+}}
+
+func (s *Selector) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, error) {
+	if s.currentSeriesBatch != nil {
+		return nil, errors.New("should not call Selector.SeriesMetadata() multiple times")
+	}
+
+	if s.LookbackDelta != 0 && s.Range != 0 {
+		return nil, errors.New("invalid Selector configuration: both LookbackDelta and Range are non-zero")
+	}
+
+	startTimestamp := s.Start
+	endTimestamp := s.End
+
+	if s.Timestamp != nil {
+		startTimestamp = *s.Timestamp
+		endTimestamp = *s.Timestamp
+	}
+
+	rangeMilliseconds := s.Range.Milliseconds()
+	start := startTimestamp - s.LookbackDelta.Milliseconds() - rangeMilliseconds
+
+	hints := &storage.SelectHints{
+		Start: start,
+		End:   endTimestamp,
+		Step:  s.Interval,
+		Range: rangeMilliseconds,
+
+		// Mimir doesn't use Grouping or By, so there's no need to include them here.
+		//
+		// Mimir does use Func to determine if it's a /series request, but this doesn't go
+		// through the PromQL engine, so we don't need to include it here either.
+		//
+		// Mimir does use ShardCount, ShardIndex and DisableTrimming, but not at this level:
+		// ShardCount and ShardIndex are set by ingesters and store-gateways when a sharding
+		// label matcher is present, and ingesters set DisableTrimming to true.
+	}
+
+	var err error
+	s.querier, err = s.Queryable.Querier(start, endTimestamp)
+	if err != nil {
+		return nil, err
+	}
+
+	ss := s.querier.Select(ctx, true, hints, s.Matchers...)
+	s.currentSeriesBatch = seriesBatchPool.Get().(*seriesBatch)
+	incompleteBatch := s.currentSeriesBatch
+	totalSeries := 0
+
+	for ss.Next() {
+		if len(incompleteBatch.series) == cap(incompleteBatch.series) {
+			nextBatch := seriesBatchPool.Get().(*seriesBatch)
+			incompleteBatch.next = nextBatch
+			incompleteBatch = nextBatch
+		}
+
+		incompleteBatch.series = append(incompleteBatch.series, ss.At())
+		totalSeries++
+	}
+
+	metadata := GetSeriesMetadataSlice(totalSeries)
+	batch := s.currentSeriesBatch
+	for batch != nil {
+		for _, s := range batch.series {
+			metadata = append(metadata, SeriesMetadata{Labels: s.Labels()})
+		}
+
+		batch = batch.next
+	}
+
+	return metadata, ss.Err()
+}
+
+func (s *Selector) Next(existing chunkenc.Iterator) (chunkenc.Iterator, error) {
+	if s.currentSeriesBatch == nil || len(s.currentSeriesBatch.series) == 0 {
+		return nil, EOS
+	}
+
+	it := s.currentSeriesBatch.series[s.seriesIndexInCurrentBatch].Iterator(existing)
+	s.seriesIndexInCurrentBatch++
+
+	if s.seriesIndexInCurrentBatch == len(s.currentSeriesBatch.series) {
+		b := s.currentSeriesBatch
+		s.currentSeriesBatch = s.currentSeriesBatch.next
+		putSeriesBatch(b)
+		s.seriesIndexInCurrentBatch = 0
+	}
+
+	return it, nil
+}
+
+func (s *Selector) Close() {
+	for s.currentSeriesBatch != nil {
+		b := s.currentSeriesBatch
+		s.currentSeriesBatch = s.currentSeriesBatch.next
+		putSeriesBatch(b)
+	}
+
+	if s.querier != nil {
+		_ = s.querier.Close()
+		s.querier = nil
+	}
+}
+
+type seriesBatch struct {
+	series []storage.Series
+	next   *seriesBatch
+}
+
+func putSeriesBatch(b *seriesBatch) {
+	b.series = b.series[:0]
+	b.next = nil
+	seriesBatchPool.Put(b)
+}
diff --git a/pkg/streamingpromql/operator/time.go b/pkg/streamingpromql/operator/time.go
new file mode 100644
index 0000000000..bc52c1cd27
--- /dev/null
+++ b/pkg/streamingpromql/operator/time.go
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+
+package operator
+
+func stepCount(start, end, interval int64) int {
+	return int((end-start)/interval) + 1
+}
diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go
new file mode 100644
index 0000000000..a5ba33b467
--- /dev/null
+++ b/pkg/streamingpromql/query.go
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/promql/engine.go
+// Provenance-includes-license: Apache-2.0
+// Provenance-includes-copyright: The Prometheus Authors
+
+package streamingpromql
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"time"
+
+	"github.com/prometheus/prometheus/model/timestamp"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/promql/parser"
+	"github.com/prometheus/prometheus/storage"
+	"github.com/prometheus/prometheus/util/stats"
+	"golang.org/x/exp/slices"
+
+	"github.com/grafana/mimir/pkg/streamingpromql/operator"
+)
+
+type Query struct {
+	queryable storage.Queryable
+	opts      promql.QueryOpts
+	statement *parser.EvalStmt
+	root      operator.InstantVectorOperator
+	engine    *Engine
+	qs        string
+
+	result *promql.Result
+}
+
+func newQuery(queryable storage.Queryable, opts promql.QueryOpts, qs string, start, end time.Time, interval time.Duration, engine *Engine) (*Query, error) {
+	if opts == nil {
+		opts = promql.NewPrometheusQueryOpts(false, 0)
+	}
+
+	expr, err := parser.ParseExpr(qs)
+	if err != nil {
+		return nil, err
+	}
+
+	expr = promql.PreprocessExpr(expr, start, end)
+
+	q := &Query{
+		queryable: queryable,
+		opts:      opts,
+		engine:    engine,
+		qs:        qs,
+		statement: &parser.EvalStmt{
+			Expr:          expr,
+			Start:         start,
+			End:           end,
+			Interval:      interval,
+			LookbackDelta: opts.LookbackDelta(),
+		},
+	}
+
+	if !q.IsInstant() {
+		if expr.Type() != parser.ValueTypeVector && expr.Type() != parser.ValueTypeScalar {
+			return nil, fmt.Errorf("query expression produces a %s, but expression for range queries must produce an instant vector or scalar", parser.DocumentedType(expr.Type()))
+		}
+	}
+
+	q.root, err = q.convertToOperator(expr)
+	if err != nil {
+		return nil, err
+	}
+
+	return q, nil
+}
+
+func (q *Query) convertToOperator(expr parser.Expr) (operator.InstantVectorOperator, error) {
+	interval := q.statement.Interval
+
+	if q.IsInstant() {
+		interval = time.Millisecond
+	}
+
+	switch e := expr.(type) {
+	case *parser.VectorSelector:
+		lookbackDelta := q.opts.LookbackDelta()
+		if lookbackDelta == 0 {
+			lookbackDelta = q.engine.lookbackDelta
+		}
+
+		if e.OriginalOffset != 0 || e.Offset != 0 {
+			return nil, NewNotSupportedError("instant vector selector with 'offset'")
+		}
+
+		return &operator.InstantVectorSelector{
+			Selector: &operator.Selector{
+				Queryable:     q.queryable,
+				Start:         timestamp.FromTime(q.statement.Start),
+				End:           timestamp.FromTime(q.statement.End),
+				Timestamp:     e.Timestamp,
+				Interval:      interval.Milliseconds(),
+				LookbackDelta: lookbackDelta,
+				Matchers:      e.LabelMatchers,
+			},
+		}, nil
+	case *parser.AggregateExpr:
+		if e.Op != parser.SUM {
+			return nil, NewNotSupportedError(fmt.Sprintf("'%s' aggregation", e.Op))
+		}
+
+		if e.Param != nil {
+			// Should be caught by the PromQL parser, but we check here for safety.
+			return nil, fmt.Errorf("unexpected parameter for %s aggregation: %s", e.Op, e.Param)
+		}
+
+		if e.Without {
+			return nil, NewNotSupportedError("grouping with 'without'")
+		}
+
+		slices.Sort(e.Grouping)
+
+		inner, err := q.convertToOperator(e.Expr)
+		if err != nil {
+			return nil, err
+		}
+
+		return &operator.Aggregation{
+			Inner:    inner,
+			Start:    q.statement.Start,
+			End:      q.statement.End,
+			Interval: interval,
+			Grouping: e.Grouping,
+		}, nil
+	case *parser.Call:
+		if e.Func.Name != "rate" {
+			return nil, NewNotSupportedError(fmt.Sprintf("'%s' function", e.Func.Name))
+		}
+
+		if len(e.Args) != 1 {
+			// Should be caught by the PromQL parser, but we check here for safety.
+			return nil, fmt.Errorf("expected exactly one argument for rate, got %v", len(e.Args))
+		}
+
+		matrixSelector, ok := e.Args[0].(*parser.MatrixSelector)
+		if !ok {
+			// Should be caught by the PromQL parser, but we check here for safety.
+			return nil, NewNotSupportedError(fmt.Sprintf("unsupported rate argument type %T", e.Args[0]))
+		}
+
+		vectorSelector := matrixSelector.VectorSelector.(*parser.VectorSelector)
+
+		if vectorSelector.OriginalOffset != 0 || vectorSelector.Offset != 0 {
+			return nil, NewNotSupportedError("range vector selector with 'offset'")
+		}
+
+		return &operator.RangeVectorSelectorWithTransformation{
+			Selector: &operator.Selector{
+				Queryable: q.queryable,
+				Start:     timestamp.FromTime(q.statement.Start),
+				End:       timestamp.FromTime(q.statement.End),
+				Timestamp: vectorSelector.Timestamp,
+				Interval:  interval.Milliseconds(),
+				Range:     matrixSelector.Range,
+				Matchers:  vectorSelector.LabelMatchers,
+			},
+		}, nil
+	case *parser.StepInvariantExpr:
+		// One day, we'll do something smarter here.
+		return q.convertToOperator(e.Expr)
+	case *parser.ParenExpr:
+		return q.convertToOperator(e.Expr)
+	default:
+		return nil, NewNotSupportedError(fmt.Sprintf("PromQL expression type %T", e))
+	}
+}
+
+func (q *Query) IsInstant() bool {
+	return q.statement.Start == q.statement.End && q.statement.Interval == 0
+}
+
+func (q *Query) Exec(ctx context.Context) *promql.Result {
+	defer q.root.Close()
+
+	series, err := q.root.SeriesMetadata(ctx)
+	if err != nil {
+		return &promql.Result{Err: err}
+	}
+	defer operator.PutSeriesMetadataSlice(series)
+
+	if q.IsInstant() {
+		v, err := q.populateVector(ctx, series)
+		if err != nil {
+			return &promql.Result{Err: err}
+		}
+
+		q.result = &promql.Result{Value: v}
+	} else {
+		m, err := q.populateMatrix(ctx, series)
+		if err != nil {
+			return &promql.Result{Value: m}
+		}
+
+		q.result = &promql.Result{Value: m}
+	}
+
+	return q.result
+}
+
+func (q *Query) populateVector(ctx context.Context, series []operator.SeriesMetadata) (promql.Vector, error) {
+	ts := timeMilliseconds(q.statement.Start)
+	v := operator.GetVector(len(series))
+
+	for i, s := range series {
+		d, err := q.root.Next(ctx)
+		if err != nil {
+			if errors.Is(err, operator.EOS) {
+				return nil, fmt.Errorf("expected %v series, but only received %v", len(series), i)
+			}
+
+			return nil, err
+		}
+
+		if len(d.Floats)+len(d.Histograms) != 1 {
+			operator.PutFPointSlice(d.Floats)
+			// TODO: put histogram point slice back in pool
+
+			if len(d.Floats)+len(d.Histograms) == 0 {
+				continue
+			}
+
+			return nil, fmt.Errorf("expected exactly one sample for series %s, but got %v", s.Labels.String(), len(d.Floats))
+		}
+
+		point := d.Floats[0]
+		v = append(v, promql.Sample{
+			Metric: s.Labels,
+			T:      ts,
+			F:      point.F,
+		})
+
+		operator.PutFPointSlice(d.Floats)
+		// TODO: put histogram point slice back in pool
+	}
+
+	return v, nil
+}
+
+func (q *Query) populateMatrix(ctx context.Context, series []operator.SeriesMetadata) (promql.Matrix, error) {
+	m := operator.GetMatrix(len(series))
+
+	for i, s := range series {
+		d, err := q.root.Next(ctx)
+		if err != nil {
+			if errors.Is(err, operator.EOS) {
+				return nil, fmt.Errorf("expected %v series, but only received %v", len(series), i)
+			}
+
+			return nil, err
+		}
+
+		if len(d.Floats) == 0 && len(d.Histograms) == 0 {
+			operator.PutFPointSlice(d.Floats)
+			// TODO: put histogram point slice back in pool
+
+			continue
+		}
+
+		m = append(m, promql.Series{
+			Metric:     s.Labels,
+			Floats:     d.Floats,
+			Histograms: d.Histograms,
+		})
+	}
+
+	return m, nil
+}
+
+func (q *Query) Close() {
+	if q.result == nil {
+		return
+	}
+
+	switch v := q.result.Value.(type) {
+	case promql.Matrix:
+		for _, s := range v {
+			operator.PutFPointSlice(s.Floats)
+			// TODO: put histogram point slice back in pool
+		}
+
+		operator.PutMatrix(v)
+	case promql.Vector:
+		operator.PutVector(v)
+	default:
+		panic(fmt.Sprintf("unknown result value type %T", q.result.Value))
+	}
+}
+
+func (q *Query) Statement() parser.Statement {
+	return q.statement
+}
+
+func (q *Query) Stats() *stats.Statistics {
+	// Not yet supported.
+	return nil
+}
+
+func (q *Query) Cancel() {
+	// Not yet supported.
+}
+
+func (q *Query) String() string {
+	return q.qs
+}
+
+func timeMilliseconds(t time.Time) int64 {
+	return t.UnixNano() / int64(time.Millisecond/time.Nanosecond)
+}
diff --git a/pkg/streamingpromql/testdata/ours/aggregators.test b/pkg/streamingpromql/testdata/ours/aggregators.test
new file mode 100644
index 0000000000..decef36bce
--- /dev/null
+++ b/pkg/streamingpromql/testdata/ours/aggregators.test
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+
+# Most cases for aggregation operators are covered already in the upstream test cases.
+# These test cases cover scenarios not covered by the upstream test cases, such as range queries, or edge cases that are uniquely likely to cause issues in the streaming engine.
+
+load 1m
+  some_metric{env="prod", cluster="eu"} 0+1x4
+  some_metric{env="prod", cluster="us"} 0+2x4
+  some_metric{env="test", cluster="eu"} 0+3x4
+  some_metric{env="test", cluster="us"} 0+4x4
+
+# Range query, aggregating to one group.
+eval range from 0 to 4m step 1m sum(some_metric)
+  {} 0 10 20 30 40
+
+# Range query, aggregating to multiple groups.
+eval range from 0 to 4m step 1m sum by (env) (some_metric)
+  {env="prod"} 0 3 6 9 12
+  {env="test"} 0 7 14 21 28
+
+# If no series are matched, we shouldn't return any results.
+eval range from 0 to 4m step 1m sum(some_nonexistent_metric)
+  # Should return no results.
+
+clear
+
+load 1m
+  some_metric_with_staleness 1 stale 2
+
+# If no non-stale points are available, we shouldn't return the series at all.
+eval range from 1m to 1m30s step 1s sum(some_metric_with_staleness)
+  # Should return no results.
+
diff --git a/pkg/streamingpromql/testdata/ours/at_modifier.test b/pkg/streamingpromql/testdata/ours/at_modifier.test
new file mode 100644
index 0000000000..8950a90068
--- /dev/null
+++ b/pkg/streamingpromql/testdata/ours/at_modifier.test
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+
+# Most cases for the @ modifier are covered already in the upstream test cases.
+# These test cases cover scenarios not covered by the upstream test cases, such as range queries, or edge cases that are uniquely likely to cause issues in the streaming engine.
+
+load 10s
+  metric 0 1 2 3 4 5
+
+# Specific timestamp aligned to an underlying point
+eval range from 0 to 50s step 10s metric @ 20
+  metric 2 2 2 2 2 2
+
+# Specific timestamp not aligned to an underlying point
+eval range from 0 to 50s step 10s metric @ 15
+  metric 1 1 1 1 1 1
+
+# Using start()
+eval range from 0 to 50s step 10s metric @ start()
+  metric 0 0 0 0 0 0
+
+# Using end(), end timestamp aligned to underlying point
+eval range from 0 to 50s step 10s metric @ end()
+  metric 5 5 5 5 5 5
+
+# Using end(), end timestamp not aligned to underlying point
+eval range from 0 to 60s step 10s metric @ end()
+  metric 5 5 5 5 5 5 5
+
+clear
+
+load 5m
+  metric 0 5 10
+
+# Using end(), initial points outside lookback window
+eval range from 0 to 10m step 1m metric @ end()
+  metric 10 10 10 10 10 10 10 10 10 10 10
diff --git a/pkg/streamingpromql/testdata/ours/functions.test b/pkg/streamingpromql/testdata/ours/functions.test
new file mode 100644
index 0000000000..5bf1ae48e9
--- /dev/null
+++ b/pkg/streamingpromql/testdata/ours/functions.test
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+
+# Most cases for functions are covered already in the upstream test cases.
+# These test cases cover scenarios not covered by the upstream test cases, such as range queries, or edge cases that are uniquely likely to cause issues in the streaming engine.
+
+load 1m
+  some_metric{env="prod", cluster="eu"} 0+60x4
+  some_metric{env="prod", cluster="us"} 0+120x4
+  some_metric{env="test", cluster="eu"} 0+180x4
+  some_metric{env="test", cluster="us"} 0+240x4
+
+# Range query with rate.
+eval range from 0 to 4m step 1m rate(some_metric[1m])
+  {env="prod", cluster="eu"} _ 1 1 1 1
+  {env="prod", cluster="us"} _ 2 2 2 2
+  {env="test", cluster="eu"} _ 3 3 3 3
+  {env="test", cluster="us"} _ 4 4 4 4
+
+# If no series are matched, we shouldn't return any results.
+eval range from 0 to 4m step 1m rate(some_nonexistent_metric[1m])
+  # Should return no results.
diff --git a/pkg/streamingpromql/testdata/ours/selectors.test b/pkg/streamingpromql/testdata/ours/selectors.test
new file mode 100644
index 0000000000..35f7e6984e
--- /dev/null
+++ b/pkg/streamingpromql/testdata/ours/selectors.test
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+
+# Most cases for selector operators are covered already in the upstream test cases.
+# These test cases cover scenarios not covered by the upstream test cases, such as range queries, or edge cases that are uniquely likely to cause issues in the streaming engine.
+
+load 1m
+  some_metric{env="prod", cluster="eu"} 0+1x4
+  some_metric{env="prod", cluster="us"} 0+2x4
+  some_metric{env="test", cluster="eu"} 0+3x4
+  some_metric{env="test", cluster="us"} 0+4x4
+
+# Range query with instant vector selector.
+eval range from 0 to 4m step 1m some_metric
+  some_metric{env="prod", cluster="eu"} 0 1 2 3 4
+  some_metric{env="prod", cluster="us"} 0 2 4 6 8
+  some_metric{env="test", cluster="eu"} 0 3 6 9 12
+  some_metric{env="test", cluster="us"} 0 4 8 12 16
+
+# If no series are matched, we shouldn't return any results.
+eval range from 0 to 4m step 1m some_nonexistent_metric
+  # Should return no results.
diff --git a/pkg/streamingpromql/testdata/ours/staleness.test b/pkg/streamingpromql/testdata/ours/staleness.test
new file mode 100644
index 0000000000..fcdb678669
--- /dev/null
+++ b/pkg/streamingpromql/testdata/ours/staleness.test
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/staleness.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+# Most cases for staleness are covered already in the upstream test cases.
+# These test cases cover scenarios not covered by the upstream test cases, such as range queries, or edge cases that are uniquely likely to cause issues in the streaming engine.
+
+load 10s
+  metric 0 1 stale 2
+
+# Test that we correctly handle stale markers.
+# In particular, we shouldn't return points inside the 5m lookback window if we'd have to cross past a stale marker to get there.
+eval range from 0s to 40s step 5s metric
+  metric 0 0 1 1 _ _ 2 2 2
+
+# If no non-stale points are available, we shouldn't return the series at all.
+eval range from 20s to 25s step 1s metric
+  # Should return no results.
diff --git a/pkg/streamingpromql/testdata/upstream/README.md b/pkg/streamingpromql/testdata/upstream/README.md
new file mode 100644
index 0000000000..a4e919dad1
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/README.md
@@ -0,0 +1,7 @@
+This directory duplicates the test cases from https://github.com/prometheus/prometheus/tree/main/promql/testdata, used by `TestUpstreamTestCases` to ensure the streaming engine
+produces the same results as Prometheus' engine.
+
+Test cases that are not supported by the streaming engine are commented out with `Unsupported by streaming engine`.
+If the entire file is not supported, appending `.disabled` to the file name disables it entirely.
+
+Once the streaming engine supports all PromQL features exercised by Prometheus' test cases, we can remove these files and instead call `promql.RunBuiltinTests` from our tests.
diff --git a/pkg/streamingpromql/testdata/upstream/aggregators.test b/pkg/streamingpromql/testdata/upstream/aggregators.test
new file mode 100644
index 0000000000..bbf370f4ff
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/aggregators.test
@@ -0,0 +1,592 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/aggregators.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+load 5m
+  http_requests{job="api-server", instance="0", group="production"} 0+10x10
+  http_requests{job="api-server", instance="1", group="production"} 0+20x10
+  http_requests{job="api-server", instance="0", group="canary"}   0+30x10
+  http_requests{job="api-server", instance="1", group="canary"}   0+40x10
+  http_requests{job="app-server", instance="0", group="production"} 0+50x10
+  http_requests{job="app-server", instance="1", group="production"} 0+60x10
+  http_requests{job="app-server", instance="0", group="canary"}   0+70x10
+  http_requests{job="app-server", instance="1", group="canary"}   0+80x10
+
+load 5m
+  foo{job="api-server", instance="0", region="europe"} 0+90x10
+  foo{job="api-server"} 0+100x10
+
+# Simple sum.
+eval instant at 50m SUM BY (group) (http_requests{job="api-server"})
+  {group="canary"} 700
+  {group="production"} 300
+
+eval instant at 50m SUM BY (group) (((http_requests{job="api-server"})))
+  {group="canary"} 700
+  {group="production"} 300
+
+# Test alternative "by"-clause order.
+eval instant at 50m sum by (group) (http_requests{job="api-server"})
+  {group="canary"} 700
+  {group="production"} 300
+
+# Simple average.
+# Unsupported by streaming engine.
+# eval instant at 50m avg by (group) (http_requests{job="api-server"})
+#   {group="canary"} 350
+#   {group="production"} 150
+
+# Simple count.
+# Unsupported by streaming engine.
+# eval instant at 50m count by (group) (http_requests{job="api-server"})
+#   {group="canary"} 2
+#   {group="production"} 2
+
+# Simple without.
+# Unsupported by streaming engine.
+# eval instant at 50m sum without (instance) (http_requests{job="api-server"})
+#   {group="canary",job="api-server"} 700
+#   {group="production",job="api-server"} 300
+
+# Empty by.
+eval instant at 50m sum by () (http_requests{job="api-server"})
+  {} 1000
+
+# No by/without.
+eval instant at 50m sum(http_requests{job="api-server"})
+  {} 1000
+
+# Empty without.
+# Unsupported by streaming engine.
+# eval instant at 50m sum without () (http_requests{job="api-server",group="production"})
+#   {group="production",job="api-server",instance="0"} 100
+#   {group="production",job="api-server",instance="1"} 200
+
+# Without with mismatched and missing labels. Do not do this.
+# Unsupported by streaming engine.
+# eval instant at 50m sum without (instance) (http_requests{job="api-server"} or foo)
+#   {group="canary",job="api-server"} 700
+#   {group="production",job="api-server"} 300
+#   {region="europe",job="api-server"} 900
+#   {job="api-server"} 1000
+
+# Lower-cased aggregation operators should work too.
+# Unsupported by streaming engine.
+# eval instant at 50m sum(http_requests) by (job) + min(http_requests) by (job) + max(http_requests) by (job) + avg(http_requests) by (job)
+#   {job="app-server"} 4550
+#   {job="api-server"} 1750
+
+# Test alternative "by"-clause order.
+eval instant at 50m sum by (group) (http_requests{job="api-server"})
+  {group="canary"} 700
+  {group="production"} 300
+
+# Test both alternative "by"-clause orders in one expression.
+# Public health warning: stick to one form within an expression (or even
+# in an organization), or risk serious user confusion.
+eval instant at 50m sum(sum by (group) (http_requests{job="api-server"})) by (job)
+  {} 1000
+
+eval instant at 50m SUM(http_requests)
+	{} 3600
+
+eval instant at 50m SUM(http_requests{instance="0"}) BY(job)
+	{job="api-server"} 400
+	{job="app-server"} 1200
+
+eval instant at 50m SUM(http_requests) BY (job)
+	{job="api-server"} 1000
+	{job="app-server"} 2600
+
+# Non-existent labels mentioned in BY-clauses shouldn't propagate to output.
+eval instant at 50m SUM(http_requests) BY (job, nonexistent)
+	{job="api-server"} 1000
+	{job="app-server"} 2600
+
+# Unsupported by streaming engine.
+# eval instant at 50m COUNT(http_requests) BY (job)
+# 	{job="api-server"} 4
+# 	{job="app-server"} 4
+
+eval instant at 50m SUM(http_requests) BY (job, group)
+	{group="canary", job="api-server"} 700
+	{group="canary", job="app-server"} 1500
+	{group="production", job="api-server"} 300
+	{group="production", job="app-server"} 1100
+
+# Unsupported by streaming engine.
+# eval instant at 50m AVG(http_requests) BY (job)
+# 	{job="api-server"} 250
+# 	{job="app-server"} 650
+
+# Unsupported by streaming engine.
+# eval instant at 50m MIN(http_requests) BY (job)
+# 	{job="api-server"} 100
+# 	{job="app-server"} 500
+
+# Unsupported by streaming engine.
+# Unsupported by streaming engine.
+# eval instant at 50m MAX(http_requests) BY (job)
+# 	{job="api-server"} 400
+# 	{job="app-server"} 800
+
+# Unsupported by streaming engine.
+# eval instant at 50m abs(-1 * http_requests{group="production",job="api-server"})
+# 	{group="production", instance="0", job="api-server"} 100
+# 	{group="production", instance="1", job="api-server"} 200
+
+# Unsupported by streaming engine.
+# eval instant at 50m floor(0.004 * http_requests{group="production",job="api-server"})
+# 	{group="production", instance="0", job="api-server"} 0
+# 	{group="production", instance="1", job="api-server"} 0
+
+# Unsupported by streaming engine.
+# eval instant at 50m ceil(0.004 * http_requests{group="production",job="api-server"})
+# 	{group="production", instance="0", job="api-server"} 1
+# 	{group="production", instance="1", job="api-server"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m round(0.004 * http_requests{group="production",job="api-server"})
+# 	{group="production", instance="0", job="api-server"} 0
+# 	{group="production", instance="1", job="api-server"} 1
+
+# Round should correctly handle negative numbers.
+# Unsupported by streaming engine.
+# eval instant at 50m round(-1 * (0.004 * http_requests{group="production",job="api-server"}))
+# 	{group="production", instance="0", job="api-server"} 0
+# 	{group="production", instance="1", job="api-server"} -1
+
+# Round should round half up.
+# Unsupported by streaming engine.
+# eval instant at 50m round(0.005 * http_requests{group="production",job="api-server"})
+# 	{group="production", instance="0", job="api-server"} 1
+# 	{group="production", instance="1", job="api-server"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m round(-1 * (0.005 * http_requests{group="production",job="api-server"}))
+# 	{group="production", instance="0", job="api-server"} 0
+# 	{group="production", instance="1", job="api-server"} -1
+
+# Unsupported by streaming engine.
+# eval instant at 50m round(1 + 0.005 * http_requests{group="production",job="api-server"})
+# 	{group="production", instance="0", job="api-server"} 2
+# 	{group="production", instance="1", job="api-server"} 2
+
+# Unsupported by streaming engine.
+# eval instant at 50m round(-1 * (1 + 0.005 * http_requests{group="production",job="api-server"}))
+# 	{group="production", instance="0", job="api-server"} -1
+# 	{group="production", instance="1", job="api-server"} -2
+
+# Round should accept the number to round nearest to.
+# Unsupported by streaming engine.
+# eval instant at 50m round(0.0005 * http_requests{group="production",job="api-server"}, 0.1)
+# 	{group="production", instance="0", job="api-server"} 0.1
+# 	{group="production", instance="1", job="api-server"} 0.1
+
+# Unsupported by streaming engine.
+# eval instant at 50m round(2.1 + 0.0005 * http_requests{group="production",job="api-server"}, 0.1)
+# 	{group="production", instance="0", job="api-server"} 2.2
+# 	{group="production", instance="1", job="api-server"} 2.2
+
+# Unsupported by streaming engine.
+# eval instant at 50m round(5.2 + 0.0005 * http_requests{group="production",job="api-server"}, 0.1)
+# 	{group="production", instance="0", job="api-server"} 5.3
+# 	{group="production", instance="1", job="api-server"} 5.3
+
+# Round should work correctly with negative numbers and multiple decimal places.
+# Unsupported by streaming engine.
+# eval instant at 50m round(-1 * (5.2 + 0.0005 * http_requests{group="production",job="api-server"}), 0.1)
+# 	{group="production", instance="0", job="api-server"} -5.2
+# 	{group="production", instance="1", job="api-server"} -5.3
+
+# Round should work correctly with big toNearests.
+# Unsupported by streaming engine.
+# eval instant at 50m round(0.025 * http_requests{group="production",job="api-server"}, 5)
+# 	{group="production", instance="0", job="api-server"} 5
+# 	{group="production", instance="1", job="api-server"} 5
+
+# Unsupported by streaming engine.
+# eval instant at 50m round(0.045 * http_requests{group="production",job="api-server"}, 5)
+# 	{group="production", instance="0", job="api-server"} 5
+# 	{group="production", instance="1", job="api-server"} 10
+
+# Standard deviation and variance.
+# Unsupported by streaming engine.
+# eval instant at 50m stddev(http_requests)
+#   {} 229.12878474779
+
+# Unsupported by streaming engine.
+# eval instant at 50m stddev by (instance)(http_requests)
+#   {instance="0"} 223.60679774998
+#   {instance="1"} 223.60679774998
+
+# Unsupported by streaming engine.
+# eval instant at 50m stdvar(http_requests)
+#   {} 52500
+
+# Unsupported by streaming engine.
+# eval instant at 50m stdvar by (instance)(http_requests)
+#   {instance="0"} 50000
+#   {instance="1"} 50000
+
+# Float precision test for standard deviation and variance
+clear
+load 5m
+  http_requests{job="api-server", instance="0", group="production"} 0+1.33x10
+  http_requests{job="api-server", instance="1", group="production"} 0+1.33x10
+  http_requests{job="api-server", instance="0", group="canary"} 0+1.33x10
+
+# Unsupported by streaming engine.
+# eval instant at 50m stddev(http_requests)
+#   {} 0.0
+
+# Unsupported by streaming engine.
+# eval instant at 50m stdvar(http_requests)
+#   {} 0.0
+
+
+# Regression test for missing separator byte in labelsToGroupingKey.
+clear
+load 5m
+  label_grouping_test{a="aa", b="bb"} 0+10x10
+  label_grouping_test{a="a", b="abb"} 0+20x10
+
+# Unsupported by streaming engine.
+# eval instant at 50m sum(label_grouping_test) by (a, b)
+#   {a="a", b="abb"} 200
+#   {a="aa", b="bb"} 100
+
+
+
+# Tests for min/max.
+clear
+load 5m
+  http_requests{job="api-server", instance="0", group="production"}	1
+  http_requests{job="api-server", instance="1", group="production"}	2
+  http_requests{job="api-server", instance="0", group="canary"}		NaN
+  http_requests{job="api-server", instance="1", group="canary"}		3
+  http_requests{job="api-server", instance="2", group="canary"}		4
+
+# Unsupported by streaming engine.
+# eval instant at 0m max(http_requests)
+#   {} 4
+
+# Unsupported by streaming engine.
+# eval instant at 0m min(http_requests)
+#   {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 0m max by (group) (http_requests)
+#   {group="production"} 2
+#   {group="canary"} 4
+
+# Unsupported by streaming engine.
+# eval instant at 0m min by (group) (http_requests)
+#   {group="production"} 1
+#   {group="canary"} 3
+
+clear
+
+# Tests for topk/bottomk.
+load 5m
+	http_requests{job="api-server", instance="0", group="production"}	0+10x10
+	http_requests{job="api-server", instance="1", group="production"}	0+20x10
+	http_requests{job="api-server", instance="2", group="production"}	NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
+	http_requests{job="api-server", instance="0", group="canary"}		0+30x10
+	http_requests{job="api-server", instance="1", group="canary"}		0+40x10
+	http_requests{job="app-server", instance="0", group="production"}	0+50x10
+	http_requests{job="app-server", instance="1", group="production"}	0+60x10
+	http_requests{job="app-server", instance="0", group="canary"}		0+70x10
+	http_requests{job="app-server", instance="1", group="canary"}		0+80x10
+	foo 3+0x10
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m topk(3, http_requests)
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m topk((3), (http_requests))
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m topk(5, http_requests{group="canary",job="app-server"})
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m bottomk(3, http_requests)
+# 	http_requests{group="production", instance="0", job="api-server"} 100
+# 	http_requests{group="production", instance="1", job="api-server"} 200
+# 	http_requests{group="canary", instance="0", job="api-server"} 300
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m bottomk(5, http_requests{group="canary",job="app-server"})
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+
+# Unsupported by streaming engine.
+# eval instant at 50m topk by (group) (1, http_requests)
+#   http_requests{group="production", instance="1", job="app-server"} 600
+#   http_requests{group="canary", instance="1", job="app-server"} 800
+
+# Unsupported by streaming engine.
+# eval instant at 50m bottomk by (group) (2, http_requests)
+#   http_requests{group="canary", instance="0", job="api-server"} 300
+#   http_requests{group="canary", instance="1", job="api-server"} 400
+#   http_requests{group="production", instance="0", job="api-server"} 100
+#   http_requests{group="production", instance="1", job="api-server"} 200
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m bottomk by (group) (2, http_requests{group="production"})
+#   http_requests{group="production", instance="0", job="api-server"} 100
+#   http_requests{group="production", instance="1", job="api-server"} 200
+
+# Test NaN is sorted away from the top/bottom.
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m topk(3, http_requests{job="api-server",group="production"})
+# 	http_requests{job="api-server", instance="1", group="production"}	200
+# 	http_requests{job="api-server", instance="0", group="production"}	100
+# 	http_requests{job="api-server", instance="2", group="production"}	NaN
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m bottomk(3, http_requests{job="api-server",group="production"})
+# 	http_requests{job="api-server", instance="0", group="production"}	100
+# 	http_requests{job="api-server", instance="1", group="production"}	200
+# 	http_requests{job="api-server", instance="2", group="production"}	NaN
+
+# Test topk and bottomk allocate min(k, input_vector) for results vector
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m bottomk(9999999999, http_requests{job="app-server",group="canary"})
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m topk(9999999999, http_requests{job="api-server",group="production"})
+# 	http_requests{job="api-server", instance="1", group="production"}	200
+# 	http_requests{job="api-server", instance="0", group="production"}	100
+# 	http_requests{job="api-server", instance="2", group="production"}	NaN
+
+# Bug #5276.
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m topk(scalar(foo), http_requests)
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+
+clear
+
+# Tests for count_values.
+load 5m
+	version{job="api-server", instance="0", group="production"}	6
+	version{job="api-server", instance="1", group="production"}	6
+	version{job="api-server", instance="2", group="production"}	6
+	version{job="api-server", instance="0", group="canary"}		8
+	version{job="api-server", instance="1", group="canary"}		8
+	version{job="app-server", instance="0", group="production"}	6
+	version{job="app-server", instance="1", group="production"}	6
+	version{job="app-server", instance="0", group="canary"}		7
+	version{job="app-server", instance="1", group="canary"}		7
+
+# Unsupported by streaming engine.
+# eval instant at 5m count_values("version", version)
+# 	{version="6"} 5
+# 	{version="7"} 2
+# 	{version="8"} 2
+
+
+# Unsupported by streaming engine.
+# eval instant at 5m count_values(((("version"))), version)
+#   {version="6"} 5
+#   {version="7"} 2
+#   {version="8"} 2
+
+
+# Unsupported by streaming engine.
+# eval instant at 5m count_values without (instance)("version", version)
+# 	{job="api-server", group="production", version="6"} 3
+# 	{job="api-server", group="canary", version="8"} 2
+# 	{job="app-server", group="production", version="6"} 2
+# 	{job="app-server", group="canary", version="7"} 2
+
+# Overwrite label with output. Don't do this.
+# Unsupported by streaming engine.
+# eval instant at 5m count_values without (instance)("job", version)
+# 	{job="6", group="production"} 5
+# 	{job="8", group="canary"} 2
+# 	{job="7", group="canary"} 2
+
+# Overwrite label with output. Don't do this.
+# Unsupported by streaming engine.
+# eval instant at 5m count_values by (job, group)("job", version)
+# 	{job="6", group="production"} 5
+# 	{job="8", group="canary"} 2
+# 	{job="7", group="canary"} 2
+
+
+# Tests for quantile.
+clear
+
+load 10s
+	data{test="two samples",point="a"} 0
+	data{test="two samples",point="b"} 1
+	data{test="three samples",point="a"} 0
+	data{test="three samples",point="b"} 1
+	data{test="three samples",point="c"} 2
+	data{test="uneven samples",point="a"} 0
+	data{test="uneven samples",point="b"} 1
+	data{test="uneven samples",point="c"} 4
+	foo .8
+
+# Unsupported by streaming engine.
+# eval instant at 1m quantile without(point)(0.8, data)
+# 	{test="two samples"} 0.8
+# 	{test="three samples"} 1.6
+# 	{test="uneven samples"} 2.8
+
+# Bug #5276.
+# Unsupported by streaming engine.
+# eval instant at 1m quantile without(point)(scalar(foo), data)
+# 	{test="two samples"} 0.8
+# 	{test="three samples"} 1.6
+# 	{test="uneven samples"} 2.8
+
+
+# Unsupported by streaming engine.
+# eval instant at 1m quantile without(point)((scalar(foo)), data)
+# 	{test="two samples"} 0.8
+# 	{test="three samples"} 1.6
+# 	{test="uneven samples"} 2.8
+
+# Unsupported by streaming engine.
+# eval instant at 1m quantile without(point)(NaN, data)
+#  {test="two samples"} NaN
+#  {test="three samples"} NaN
+#  {test="uneven samples"} NaN
+
+# Tests for group.
+clear
+
+load 10s
+	data{test="two samples",point="a"} 0
+	data{test="two samples",point="b"} 1
+	data{test="three samples",point="a"} 0
+	data{test="three samples",point="b"} 1
+	data{test="three samples",point="c"} 2
+	data{test="uneven samples",point="a"} 0
+	data{test="uneven samples",point="b"} 1
+	data{test="uneven samples",point="c"} 4
+	foo .8
+
+# Unsupported by streaming engine.
+# eval instant at 1m group without(point)(data)
+# 	{test="two samples"} 1
+# 	{test="three samples"} 1
+# 	{test="uneven samples"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 1m group(foo)
+# 	{} 1
+
+# Tests for avg.
+clear
+
+load 10s
+	data{test="ten",point="a"} 8
+	data{test="ten",point="b"} 10
+	data{test="ten",point="c"} 12
+	data{test="inf",point="a"} 0
+	data{test="inf",point="b"} Inf
+	data{test="inf",point="d"} Inf
+	data{test="inf",point="c"} 0
+	data{test="-inf",point="a"} -Inf
+	data{test="-inf",point="b"} -Inf
+	data{test="-inf",point="c"} 0
+	data{test="inf2",point="a"} Inf
+	data{test="inf2",point="b"} 0
+	data{test="inf2",point="c"} Inf
+	data{test="-inf2",point="a"} -Inf
+	data{test="-inf2",point="b"} 0
+	data{test="-inf2",point="c"} -Inf
+	data{test="inf3",point="b"} Inf
+	data{test="inf3",point="d"} Inf
+	data{test="inf3",point="c"} Inf
+	data{test="inf3",point="d"} -Inf
+	data{test="-inf3",point="b"} -Inf
+	data{test="-inf3",point="d"} -Inf
+	data{test="-inf3",point="c"} -Inf
+	data{test="-inf3",point="c"} Inf
+	data{test="nan",point="a"} -Inf
+	data{test="nan",point="b"} 0
+	data{test="nan",point="c"} Inf
+	data{test="big",point="a"} 9.988465674311579e+307
+	data{test="big",point="b"} 9.988465674311579e+307
+	data{test="big",point="c"} 9.988465674311579e+307
+	data{test="big",point="d"} 9.988465674311579e+307
+	data{test="-big",point="a"} -9.988465674311579e+307
+	data{test="-big",point="b"} -9.988465674311579e+307
+	data{test="-big",point="c"} -9.988465674311579e+307
+	data{test="-big",point="d"} -9.988465674311579e+307
+	data{test="bigzero",point="a"} -9.988465674311579e+307
+	data{test="bigzero",point="b"} -9.988465674311579e+307
+	data{test="bigzero",point="c"} 9.988465674311579e+307
+	data{test="bigzero",point="d"} 9.988465674311579e+307
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg(data{test="ten"})
+# 	{} 10
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg(data{test="inf"})
+# 	{} Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg(data{test="inf2"})
+# 	{} Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg(data{test="inf3"})
+# 	{} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg(data{test="-inf"})
+# 	{} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg(data{test="-inf2"})
+# 	{} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg(data{test="-inf3"})
+# 	{} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg(data{test="nan"})
+# 	{} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg(data{test="big"})
+# 	{} 9.988465674311579e+307
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg(data{test="-big"})
+# 	{} -9.988465674311579e+307
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg(data{test="bigzero"})
+# 	{} 0
+
+clear
+
+# Test that aggregations are deterministic.
+# Commented because it is flaky in range mode.
+#load 10s
+#	up{job="prometheus"} 1
+#	up{job="prometheus2"} 1
+#
+#eval instant at 1m count(topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()))
+#	{} 1
diff --git a/pkg/streamingpromql/testdata/upstream/at_modifier.test b/pkg/streamingpromql/testdata/upstream/at_modifier.test
new file mode 100644
index 0000000000..5909dc7659
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/at_modifier.test
@@ -0,0 +1,210 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/at_modifier.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+load 10s
+  metric{job="1"} 0+1x1000
+  metric{job="2"} 0+2x1000
+
+load 1ms
+  metric_ms 0+1x10000
+
+# Instant vector selectors.
+eval instant at 10s metric @ 100
+  metric{job="1"} 10
+  metric{job="2"} 20
+
+# Unsupported by streaming engine.
+# eval instant at 10s metric @ 100 offset 50s
+#   metric{job="1"} 5
+#   metric{job="2"} 10
+
+# Unsupported by streaming engine.
+# eval instant at 10s metric offset 50s @ 100
+#   metric{job="1"} 5
+#   metric{job="2"} 10
+
+# Unsupported by streaming engine.
+# eval instant at 10s metric @ 0 offset -50s
+#   metric{job="1"} 5
+#   metric{job="2"} 10
+
+# Unsupported by streaming engine.
+# eval instant at 10s metric offset -50s @ 0
+#   metric{job="1"} 5
+#   metric{job="2"} 10
+
+# Unsupported by streaming engine.
+# eval instant at 10s -metric @ 100
+#   {job="1"} -10
+#   {job="2"} -20
+
+# Unsupported by streaming engine.
+# eval instant at 10s ---metric @ 100
+#   {job="1"} -10
+#   {job="2"} -20
+
+# Millisecond precision.
+# eval instant at 100s metric_ms @ 1.234
+#   metric_ms 1234
+
+# Range vector selectors.
+# Unsupported by streaming engine.
+# eval instant at 25s sum_over_time(metric{job="1"}[100s] @ 100)
+#   {job="1"} 55
+
+# Unsupported by streaming engine.
+# eval instant at 25s sum_over_time(metric{job="1"}[100s] @ 100 offset 50s)
+#   {job="1"} 15
+
+# Unsupported by streaming engine.
+# eval instant at 25s sum_over_time(metric{job="1"}[100s] offset 50s @ 100)
+#   {job="1"} 15
+
+# Different timestamps.
+# Unsupported by streaming engine.
+# eval instant at 25s metric{job="1"} @ 50 + metric{job="1"} @ 100
+#   {job="1"} 15
+
+# Unsupported by streaming engine.
+# eval instant at 25s rate(metric{job="1"}[100s] @ 100) + label_replace(rate(metric{job="2"}[123s] @ 200), "job", "1", "", "")
+#   {job="1"} 0.3
+
+# Unsupported by streaming engine.
+# eval instant at 25s sum_over_time(metric{job="1"}[100s] @ 100) + label_replace(sum_over_time(metric{job="2"}[100s] @ 100), "job", "1", "", "")
+#   {job="1"} 165
+
+# Subqueries.
+
+# 10*(1+2+...+9) + 10.
+# Unsupported by streaming engine.
+# eval instant at 25s sum_over_time(metric{job="1"}[100s:1s] @ 100)
+#   {job="1"} 460
+
+# 10*(1+2+...+7) + 8.
+# Unsupported by streaming engine.
+# eval instant at 25s sum_over_time(metric{job="1"}[100s:1s] @ 100 offset 20s)
+#   {job="1"} 288
+
+# 10*(1+2+...+7) + 8.
+# Unsupported by streaming engine.
+# eval instant at 25s sum_over_time(metric{job="1"}[100s:1s] offset 20s @ 100)
+#   {job="1"} 288
+
+# Subquery with different timestamps.
+
+# Since vector selector has timestamp, the result value does not depend on the timestamp of subqueries.
+# Inner most sum=1+2+...+10=55.
+# With [100s:25s] subquery, it's 55*5.
+# Unsupported by streaming engine.
+# eval instant at 100s sum_over_time(sum_over_time(metric{job="1"}[100s] @ 100)[100s:25s] @ 50)
+#   {job="1"} 275
+
+# Nested subqueries with different timestamps on both.
+
+# Since vector selector has timestamp, the result value does not depend on the timestamp of subqueries.
+# Sum of innermost subquery is 275 as above. The outer subquery repeats it 4 times.
+# Unsupported by streaming engine.
+# eval instant at 0s sum_over_time(sum_over_time(sum_over_time(metric{job="1"}[100s] @ 100)[100s:25s] @ 50)[3s:1s] @ 3000)
+#   {job="1"} 1100
+
+# Testing the inner subquery timestamp since vector selector does not have @.
+
+# Inner sum for subquery [100s:25s] @ 50 are
+#   at -50 nothing, at -25 nothing, at 0=0, at 25=2, at 50=4+5=9.
+# This sum of 11 is repeated 4 times by outer subquery.
+# Unsupported by streaming engine.
+# eval instant at 0s sum_over_time(sum_over_time(sum_over_time(metric{job="1"}[10s])[100s:25s] @ 50)[3s:1s] @ 200)
+#   {job="1"} 44
+
+# Inner sum for subquery [100s:25s] @ 200 are
+#   at 100=9+10, at 125=12, at 150=14+15, at 175=17, at 200=19+20.
+# This sum of 116 is repeated 4 times by outer subquery.
+# Unsupported by streaming engine.
+# eval instant at 0s sum_over_time(sum_over_time(sum_over_time(metric{job="1"}[10s])[100s:25s] @ 200)[3s:1s] @ 50)
+#   {job="1"} 464
+
+# Nested subqueries with timestamp only on outer subquery.
+# Outer most subquery:
+#   at 900=783
+#     inner subquery: at 870=87+86+85, at 880=88+87+86, at 890=89+88+87
+#   at 925=537
+#     inner subquery: at 895=89+88, at 905=90+89, at 915=90+91
+#   at 950=828
+#     inner subquery: at 920=92+91+90, at 930=93+92+91, at 940=94+93+92
+#   at 975=567
+#     inner subquery: at 945=94+93, at 955=95+94, at 965=96+95
+#   at 1000=873
+#     inner subquery: at 970=97+96+95, at 980=98+97+96, at 990=99+98+97
+# Unsupported by streaming engine.
+# eval instant at 0s sum_over_time(sum_over_time(sum_over_time(metric{job="1"}[20s])[20s:10s] offset 10s)[100s:25s] @ 1000)
+#   {job="1"} 3588
+
+# minute is counted on the value of the sample.
+# Unsupported by streaming engine.
+# eval instant at 10s minute(metric @ 1500)
+#   {job="1"} 2
+#   {job="2"} 5
+
+# timestamp() takes the time of the sample and not the evaluation time.
+# Unsupported by streaming engine.
+# eval instant at 10m timestamp(metric{job="1"} @ 10)
+#   {job="1"} 10
+
+# The result of inner timestamp() will have the timestamp as the
+# eval time, hence entire expression is not step invariant and depends on eval time.
+# Unsupported by streaming engine.
+# eval instant at 10m timestamp(timestamp(metric{job="1"} @ 10))
+#   {job="1"} 600
+
+# Unsupported by streaming engine.
+# eval instant at 15m timestamp(timestamp(metric{job="1"} @ 10))
+#   {job="1"} 900
+
+# Time functions inside a subquery.
+
+# minute is counted on the value of the sample.
+# Unsupported by streaming engine.
+# eval instant at 0s sum_over_time(minute(metric @ 1500)[100s:10s])
+#   {job="1"} 22
+#   {job="2"} 55
+
+# If nothing passed, minute() takes eval time.
+# Here the eval time is determined by the subquery.
+# [50m:1m] at 6000, i.e. 100m, is 50m to 100m.
+# sum=50+51+52+...+59+0+1+2+...+40.
+# Unsupported by streaming engine.
+# eval instant at 0s sum_over_time(minute()[50m:1m] @ 6000)
+#   {} 1365
+
+# sum=45+46+47+...+59+0+1+2+...+35.
+# Unsupported by streaming engine.
+# eval instant at 0s sum_over_time(minute()[50m:1m] @ 6000 offset 5m)
+#   {} 1410
+
+# time() is the eval time which is determined by subquery here.
+# 2900+2901+...+3000 = (3000*3001 - 2899*2900)/2.
+# Unsupported by streaming engine.
+# eval instant at 0s sum_over_time(vector(time())[100s:1s] @ 3000)
+#   {} 297950
+
+# 2300+2301+...+2400 = (2400*2401 - 2299*2300)/2.
+# Unsupported by streaming engine.
+# eval instant at 0s sum_over_time(vector(time())[100s:1s] @ 3000 offset 600s)
+#   {} 237350
+
+# timestamp() takes the time of the sample and not the evaluation time.
+# Unsupported by streaming engine.
+# eval instant at 0s sum_over_time(timestamp(metric{job="1"} @ 10)[100s:10s] @ 3000)
+#   {job="1"} 110
+
+# The result of inner timestamp() will have the timestamp as the
+# eval time, hence entire expression is not step invariant and depends on eval time.
+# Here eval time is determined by the subquery.
+# Unsupported by streaming engine.
+# eval instant at 0s sum_over_time(timestamp(timestamp(metric{job="1"} @ 999))[10s:1s] @ 10)
+#   {job="1"} 55
+
+
+clear
diff --git a/pkg/streamingpromql/testdata/upstream/collision.test.disabled b/pkg/streamingpromql/testdata/upstream/collision.test.disabled
new file mode 100644
index 0000000000..63139a765f
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/collision.test.disabled
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/collision.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+load 1s
+      node_namespace_pod:kube_pod_info:{namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1
+      node_cpu_seconds_total{cpu="10",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449
+      node_cpu_seconds_total{cpu="35",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449
+      node_cpu_seconds_total{cpu="89",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449
+
+eval instant at 4s count by(namespace, pod, cpu) (node_cpu_seconds_total{cpu=~".*",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v"}) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{namespace="observability",pod="node-exporter-l454v"}
+    {cpu="10",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1
+    {cpu="35",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1
+    {cpu="89",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1
+
+clear
+
+# Test duplicate labelset in promql output.
+load 5m
+  testmetric1{src="a",dst="b"} 0
+  testmetric2{src="a",dst="b"} 1
+
+eval_fail instant at 0m ceil({__name__=~'testmetric1|testmetric2'})
+
+clear
diff --git a/pkg/streamingpromql/testdata/upstream/functions.test b/pkg/streamingpromql/testdata/upstream/functions.test
new file mode 100644
index 0000000000..66bb0d8006
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/functions.test
@@ -0,0 +1,1426 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/functions.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+# Testdata for resets() and changes().
+load 5m
+	http_requests{path="/foo"}	1 2 3 0 1 0 0 1 2 0
+	http_requests{path="/bar"}	1 2 3 4 5 1 2 3 4 5
+	http_requests{path="/biz"}	0 0 0 0 0 1 1 1 1 1
+
+# Tests for resets().
+# Unsupported by streaming engine.
+# eval instant at 50m resets(http_requests[5m])
+# 	{path="/foo"} 0
+# 	{path="/bar"} 0
+# 	{path="/biz"} 0
+
+# Unsupported by streaming engine.
+# eval instant at 50m resets(http_requests[20m])
+# 	{path="/foo"} 1
+# 	{path="/bar"} 0
+# 	{path="/biz"} 0
+
+# Unsupported by streaming engine.
+# eval instant at 50m resets(http_requests[30m])
+# 	{path="/foo"} 2
+# 	{path="/bar"} 1
+# 	{path="/biz"} 0
+
+# Unsupported by streaming engine.
+# eval instant at 50m resets(http_requests[50m])
+# 	{path="/foo"} 3
+# 	{path="/bar"} 1
+# 	{path="/biz"} 0
+
+# Unsupported by streaming engine.
+# eval instant at 50m resets(nonexistent_metric[50m])
+
+# Tests for changes().
+# Unsupported by streaming engine.
+# eval instant at 50m changes(http_requests[5m])
+# 	{path="/foo"} 0
+# 	{path="/bar"} 0
+# 	{path="/biz"} 0
+
+# Unsupported by streaming engine.
+# eval instant at 50m changes(http_requests[20m])
+# 	{path="/foo"} 3
+# 	{path="/bar"} 3
+# 	{path="/biz"} 0
+
+# Unsupported by streaming engine.
+# eval instant at 50m changes(http_requests[30m])
+# 	{path="/foo"} 4
+# 	{path="/bar"} 5
+# 	{path="/biz"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m changes(http_requests[50m])
+# 	{path="/foo"} 8
+# 	{path="/bar"} 9
+# 	{path="/biz"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m changes((http_requests[50m]))
+# 	{path="/foo"} 8
+# 	{path="/bar"} 9
+# 	{path="/biz"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m changes(nonexistent_metric[50m])
+
+clear
+
+load 5m
+  x{a="b"} NaN NaN NaN
+  x{a="c"} 0 NaN 0
+
+# Unsupported by streaming engine.
+# eval instant at 15m changes(x[15m])
+#   {a="b"} 0
+#   {a="c"} 2
+
+clear
+
+# Tests for increase().
+load 5m
+	http_requests{path="/foo"}	0+10x10
+	http_requests{path="/bar"}	0+10x5 0+10x5
+	http_requests{path="/dings"}   10+10x10
+	http_requests{path="/bumms"}    1+10x10
+
+# Tests for increase().
+# Unsupported by streaming engine.
+# eval instant at 50m increase(http_requests[50m])
+# 	{path="/foo"}   100
+# 	{path="/bar"}    90
+# 	{path="/dings"} 100
+# 	{path="/bumms"} 100
+
+# "foo" and "bar" are already at value 0 at t=0, so no extrapolation
+# happens. "dings" has value 10 at t=0 and would reach 0 at t=-5m. The
+# normal extrapolation by half a sample interval only goes to
+# t=-2m30s, so that's not yet reaching a negative value and therefore
+# chosen. However, "bumms" has value 1 at t=0 and would reach 0 at
+# t=-30s. Here the extrapolation to t=-2m30s would reach a negative
+# value, and therefore the extrapolation happens only by 30s.
+# Unsupported by streaming engine.
+# eval instant at 50m increase(http_requests[100m])
+# 	{path="/foo"}   100
+# 	{path="/bar"}    90
+# 	{path="/dings"} 105
+# 	{path="/bumms"} 101
+
+clear
+
+# Test for increase() with counter reset.
+# When the counter is reset, it always starts at 0.
+# So the sequence 3 2 (decreasing counter = reset) is interpreted the same as 3 0 1 2.
+# Prometheus assumes it missed the intermediate values 0 and 1.
+load 5m
+	http_requests{path="/foo"}	0 1 2 3 2 3 4
+
+# Unsupported by streaming engine.
+# eval instant at 30m increase(http_requests[30m])
+#     {path="/foo"} 7
+
+clear
+
+# Tests for rate().
+load 5m
+	testcounter_reset_middle	0+10x4 0+10x5
+	testcounter_reset_end    	0+10x9 0 10
+
+# Counter resets at in the middle of range are handled correctly by rate().
+eval instant at 50m rate(testcounter_reset_middle[50m])
+	{} 0.03
+
+# Counter resets at end of range are ignored by rate().
+eval instant at 50m rate(testcounter_reset_end[5m])
+	{} 0
+
+clear
+
+load 5m
+	calculate_rate_offset{x="a"}	0+10x10
+	calculate_rate_offset{x="b"}	0+20x10
+	calculate_rate_window		0+80x10
+
+# Rates should calculate per-second rates.
+eval instant at 50m rate(calculate_rate_window[50m])
+	{} 0.26666666666666666
+
+# Unsupported by streaming engine.
+# eval instant at 50m rate(calculate_rate_offset[10m] offset 5m)
+# 	{x="a"} 0.03333333333333333
+#  	{x="b"} 0.06666666666666667
+
+clear
+
+load 4m
+	testcounter_zero_cutoff{start="0m"}	0+240x10
+	testcounter_zero_cutoff{start="1m"}	60+240x10
+	testcounter_zero_cutoff{start="2m"}	120+240x10
+	testcounter_zero_cutoff{start="3m"}	180+240x10
+	testcounter_zero_cutoff{start="4m"}	240+240x10
+	testcounter_zero_cutoff{start="5m"}	300+240x10
+
+# Zero cutoff for left-side extrapolation happens until we
+# reach half a sampling interval (2m). Beyond that, we only
+# extrapolate by half a sampling interval.
+eval instant at 10m rate(testcounter_zero_cutoff[20m])
+ 	{start="0m"} 0.5
+ 	{start="1m"} 0.55
+ 	{start="2m"} 0.6
+ 	{start="3m"} 0.6
+ 	{start="4m"} 0.6
+ 	{start="5m"} 0.6
+
+# Normal half-interval cutoff for left-side extrapolation.
+eval instant at 50m rate(testcounter_zero_cutoff[20m])
+ 	{start="0m"} 0.6
+ 	{start="1m"} 0.6
+ 	{start="2m"} 0.6
+ 	{start="3m"} 0.6
+ 	{start="4m"} 0.6
+ 	{start="5m"} 0.6
+
+clear
+
+# Tests for irate().
+load 5m
+	http_requests{path="/foo"}	0+10x10
+	http_requests{path="/bar"}	0+10x5 0+10x5
+
+# Unsupported by streaming engine.
+# eval instant at 50m irate(http_requests[50m])
+# 	{path="/foo"} .03333333333333333333
+# 	{path="/bar"} .03333333333333333333
+
+# Counter reset.
+# Unsupported by streaming engine.
+# eval instant at 30m irate(http_requests[50m])
+# 	{path="/foo"} .03333333333333333333
+# 	{path="/bar"} 0
+
+clear
+
+# Tests for delta().
+load 5m
+	http_requests{path="/foo"}	0 50 100 150 200
+	http_requests{path="/bar"}	200 150 100 50 0
+
+# Unsupported by streaming engine.
+# eval instant at 20m delta(http_requests[20m])
+# 	{path="/foo"} 200
+# 	{path="/bar"} -200
+
+clear
+
+# Tests for idelta().
+load 5m
+	http_requests{path="/foo"}	0 50 100 150
+	http_requests{path="/bar"}	0 50 100 50
+
+# Unsupported by streaming engine.
+# eval instant at 20m idelta(http_requests[20m])
+# 	{path="/foo"} 50
+# 	{path="/bar"} -50
+
+clear
+
+# Tests for deriv() and predict_linear().
+load 5m
+	testcounter_reset_middle	0+10x4 0+10x5
+	http_requests{job="app-server", instance="1", group="canary"}		0+80x10
+
+# deriv should return the same as rate in simple cases.
+# Unsupported by streaming engine.
+# eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m])
+# 	{group="canary", instance="1", job="app-server"} 0.26666666666666666
+
+# Unsupported by streaming engine.
+# eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[50m])
+# 	{group="canary", instance="1", job="app-server"} 0.26666666666666666
+
+# deriv should return correct result.
+# Unsupported by streaming engine.
+# eval instant at 50m deriv(testcounter_reset_middle[100m])
+# 	{} 0.010606060606060607
+
+# predict_linear should return correct result.
+# X/s = [  0, 300, 600, 900,1200,1500,1800,2100,2400,2700,3000]
+# Y   = [  0,  10,  20,  30,  40,   0,  10,  20,  30,  40,  50]
+# sumX  = 16500
+# sumY  = 250
+# sumXY = 480000
+# sumX2 = 34650000
+# n     = 11
+# covXY = 105000
+# varX  = 9900000
+# slope = 0.010606060606060607
+# intercept at t=0: 6.818181818181818
+# intercept at t=3000: 38.63636363636364
+# intercept at t=3000+3600: 76.81818181818181
+# Unsupported by streaming engine.
+# eval instant at 50m predict_linear(testcounter_reset_middle[50m], 3600)
+# 	{} 76.81818181818181
+
+# intercept at t = 3000+3600 = 6600
+# Unsupported by streaming engine.
+# eval instant at 50m predict_linear(testcounter_reset_middle[50m] @ 3000, 3600)
+# 	{} 76.81818181818181
+
+# intercept at t = 600+3600 = 4200
+# Unsupported by streaming engine.
+# eval instant at 10m predict_linear(testcounter_reset_middle[50m] @ 3000, 3600)
+# 	{} 51.36363636363637
+
+# intercept at t = 4200+3600 = 7800
+# Unsupported by streaming engine.
+# eval instant at 70m predict_linear(testcounter_reset_middle[50m] @ 3000, 3600)
+# 	{} 89.54545454545455
+
+# With http_requests, there is a sample value exactly at the end of
+# the range, and it has exactly the predicted value, so predict_linear
+# can be emulated with deriv.
+# Unsupported by streaming engine.
+# eval instant at 50m predict_linear(http_requests[50m], 3600) - (http_requests + deriv(http_requests[50m]) * 3600)
+# 	{group="canary", instance="1", job="app-server"} 0
+
+clear
+
+# Tests for label_replace.
+load 5m
+  testmetric{src="source-value-10",dst="original-destination-value"} 0
+  testmetric{src="source-value-20",dst="original-destination-value"} 1
+
+# label_replace does a full-string match and replace.
+# Unsupported by streaming engine.
+# eval instant at 0m label_replace(testmetric, "dst", "destination-value-$1", "src", "source-value-(.*)")
+#   testmetric{src="source-value-10",dst="destination-value-10"} 0
+#   testmetric{src="source-value-20",dst="destination-value-20"} 1
+
+# label_replace does not do a sub-string match.
+# Unsupported by streaming engine.
+# eval instant at 0m label_replace(testmetric, "dst", "destination-value-$1", "src", "value-(.*)")
+#   testmetric{src="source-value-10",dst="original-destination-value"} 0
+#   testmetric{src="source-value-20",dst="original-destination-value"} 1
+
+# label_replace works with multiple capture groups.
+# Unsupported by streaming engine.
+# eval instant at 0m label_replace(testmetric, "dst", "$1-value-$2", "src", "(.*)-value-(.*)")
+#   testmetric{src="source-value-10",dst="source-value-10"} 0
+#   testmetric{src="source-value-20",dst="source-value-20"} 1
+
+# label_replace does not overwrite the destination label if the source label
+# does not exist.
+# Unsupported by streaming engine.
+# eval instant at 0m label_replace(testmetric, "dst", "value-$1", "nonexistent-src", "source-value-(.*)")
+#   testmetric{src="source-value-10",dst="original-destination-value"} 0
+#   testmetric{src="source-value-20",dst="original-destination-value"} 1
+
+# label_replace overwrites the destination label if the source label is empty,
+# but matched.
+# Unsupported by streaming engine.
+# eval instant at 0m label_replace(testmetric, "dst", "value-$1", "nonexistent-src", "(.*)")
+#   testmetric{src="source-value-10",dst="value-"} 0
+#   testmetric{src="source-value-20",dst="value-"} 1
+
+# label_replace does not overwrite the destination label if the source label
+# is not matched.
+# Unsupported by streaming engine.
+# eval instant at 0m label_replace(testmetric, "dst", "value-$1", "src", "non-matching-regex")
+#   testmetric{src="source-value-10",dst="original-destination-value"} 0
+#   testmetric{src="source-value-20",dst="original-destination-value"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 0m label_replace((((testmetric))), (("dst")), (("value-$1")), (("src")), (("non-matching-regex")))
+#   testmetric{src="source-value-10",dst="original-destination-value"} 0
+#   testmetric{src="source-value-20",dst="original-destination-value"} 1
+
+# label_replace drops labels that are set to empty values.
+# Unsupported by streaming engine.
+# eval instant at 0m label_replace(testmetric, "dst", "", "dst", ".*")
+#   testmetric{src="source-value-10"} 0
+#   testmetric{src="source-value-20"} 1
+
+# label_replace fails when the regex is invalid.
+# Unsupported by streaming engine.
+# eval_fail instant at 0m label_replace(testmetric, "dst", "value-$1", "src", "(.*")
+
+# label_replace fails when the destination label name is not a valid Prometheus label name.
+# Unsupported by streaming engine.
+# eval_fail instant at 0m label_replace(testmetric, "invalid-label-name", "", "src", "(.*)")
+
+# label_replace fails when there would be duplicated identical output label sets.
+# Unsupported by streaming engine.
+# eval_fail instant at 0m label_replace(testmetric, "src", "", "", "")
+
+clear
+
+# Tests for vector, time and timestamp.
+load 10s
+  metric 1 1
+
+# Unsupported by streaming engine.
+# eval instant at 0s timestamp(metric)
+#   {} 0
+
+# Unsupported by streaming engine.
+# eval instant at 5s timestamp(metric)
+#   {} 0
+
+# Unsupported by streaming engine.
+# eval instant at 5s timestamp(((metric)))
+#   {} 0
+
+# Unsupported by streaming engine.
+# eval instant at 10s timestamp(metric)
+#   {} 10
+
+# Unsupported by streaming engine.
+# eval instant at 10s timestamp(((metric)))
+#   {} 10
+
+# Tests for label_join.
+load 5m
+  testmetric{src="a",src1="b",src2="c",dst="original-destination-value"} 0
+  testmetric{src="d",src1="e",src2="f",dst="original-destination-value"} 1
+
+# label_join joins all src values in order.
+# Unsupported by streaming engine.
+# eval instant at 0m label_join(testmetric, "dst", "-", "src", "src1", "src2")
+#   testmetric{src="a",src1="b",src2="c",dst="a-b-c"} 0
+#   testmetric{src="d",src1="e",src2="f",dst="d-e-f"} 1
+
+# label_join treats non existent src labels as empty strings.
+# Unsupported by streaming engine.
+# eval instant at 0m label_join(testmetric, "dst", "-", "src", "src3", "src1")
+#   testmetric{src="a",src1="b",src2="c",dst="a--b"} 0
+#   testmetric{src="d",src1="e",src2="f",dst="d--e"} 1
+
+# label_join overwrites the destination label even if the resulting dst label is empty string
+# Unsupported by streaming engine.
+# eval instant at 0m label_join(testmetric, "dst", "", "emptysrc", "emptysrc1", "emptysrc2")
+#   testmetric{src="a",src1="b",src2="c"} 0
+#   testmetric{src="d",src1="e",src2="f"} 1
+
+# test without src label for label_join
+# Unsupported by streaming engine.
+# eval instant at 0m label_join(testmetric, "dst", ", ")
+# 	  testmetric{src="a",src1="b",src2="c"} 0
+# 	  testmetric{src="d",src1="e",src2="f"} 1
+
+# test without dst label for label_join
+load 5m
+  testmetric1{src="foo",src1="bar",src2="foobar"} 0
+  testmetric1{src="fizz",src1="buzz",src2="fizzbuzz"} 1
+
+# label_join creates dst label if not present.
+# Unsupported by streaming engine.
+# eval instant at 0m label_join(testmetric1, "dst", ", ", "src", "src1", "src2")
+#   testmetric1{src="foo",src1="bar",src2="foobar",dst="foo, bar, foobar"} 0
+#   testmetric1{src="fizz",src1="buzz",src2="fizzbuzz",dst="fizz, buzz, fizzbuzz"} 1
+
+clear
+
+# Tests for vector.
+# Unsupported by streaming engine.
+# eval instant at 0m vector(1)
+#   {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 0s vector(time())
+#   {} 0
+
+# Unsupported by streaming engine.
+# eval instant at 5s vector(time())
+#   {} 5
+
+# Unsupported by streaming engine.
+# eval instant at 60m vector(time())
+#   {} 3600
+
+
+# Tests for clamp_max, clamp_min(), and clamp().
+load 5m
+	test_clamp{src="clamp-a"}	-50
+	test_clamp{src="clamp-b"}	0
+	test_clamp{src="clamp-c"}	100
+
+# Unsupported by streaming engine.
+# eval instant at 0m clamp_max(test_clamp, 75)
+# 	{src="clamp-a"}	-50
+# 	{src="clamp-b"}	0
+# 	{src="clamp-c"}	75
+
+# Unsupported by streaming engine.
+# eval instant at 0m clamp_min(test_clamp, -25)
+# 	{src="clamp-a"}	-25
+# 	{src="clamp-b"}	0
+# 	{src="clamp-c"}	100
+
+# Unsupported by streaming engine.
+# eval instant at 0m clamp(test_clamp, -25, 75)
+# 	{src="clamp-a"}	-25
+# 	{src="clamp-b"}	0
+# 	{src="clamp-c"}	75
+
+# Unsupported by streaming engine.
+# eval instant at 0m clamp_max(clamp_min(test_clamp, -20), 70)
+# 	{src="clamp-a"}	-20
+# 	{src="clamp-b"}	0
+# 	{src="clamp-c"}	70
+
+# Unsupported by streaming engine.
+# eval instant at 0m clamp_max((clamp_min(test_clamp, (-20))), (70))
+# 	{src="clamp-a"}	-20
+# 	{src="clamp-b"}	0
+# 	{src="clamp-c"}	70
+
+# Unsupported by streaming engine.
+# eval instant at 0m clamp(test_clamp, 0, NaN)
+# 	{src="clamp-a"}	NaN
+# 	{src="clamp-b"}	NaN
+# 	{src="clamp-c"}	NaN
+
+# Unsupported by streaming engine.
+# eval instant at 0m clamp(test_clamp, NaN, 0)
+# 	{src="clamp-a"}	NaN
+# 	{src="clamp-b"}	NaN
+# 	{src="clamp-c"}	NaN
+
+# Unsupported by streaming engine.
+# eval instant at 0m clamp(test_clamp, 5, -5)
+
+# Test cases for sgn.
+clear
+load 5m
+	test_sgn{src="sgn-a"}	-Inf
+	test_sgn{src="sgn-b"}	Inf
+	test_sgn{src="sgn-c"}	NaN
+	test_sgn{src="sgn-d"}	-50
+	test_sgn{src="sgn-e"}	0
+	test_sgn{src="sgn-f"}	100
+
+# Unsupported by streaming engine.
+# eval instant at 0m sgn(test_sgn)
+# 	{src="sgn-a"}	-1
+# 	{src="sgn-b"}	1
+# 	{src="sgn-c"}	NaN
+# 	{src="sgn-d"}	-1
+# 	{src="sgn-e"}	0
+# 	{src="sgn-f"}	1
+
+
+# Tests for sort/sort_desc.
+clear
+load 5m
+	http_requests{job="api-server", instance="0", group="production"}	0+10x10
+	http_requests{job="api-server", instance="1", group="production"}	0+20x10
+	http_requests{job="api-server", instance="0", group="canary"}		0+30x10
+	http_requests{job="api-server", instance="1", group="canary"}		0+40x10
+	http_requests{job="api-server", instance="2", group="canary"}		NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
+	http_requests{job="app-server", instance="0", group="production"}	0+50x10
+	http_requests{job="app-server", instance="1", group="production"}	0+60x10
+	http_requests{job="app-server", instance="0", group="canary"}		0+70x10
+	http_requests{job="app-server", instance="1", group="canary"}		0+80x10
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort(http_requests)
+# 	http_requests{group="production", instance="0", job="api-server"} 100
+# 	http_requests{group="production", instance="1", job="api-server"} 200
+# 	http_requests{group="canary", instance="0", job="api-server"} 300
+# 	http_requests{group="canary", instance="1", job="api-server"} 400
+# 	http_requests{group="production", instance="0", job="app-server"} 500
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="canary", instance="2", job="api-server"} NaN
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort_desc(http_requests)
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+# 	http_requests{group="production", instance="0", job="app-server"} 500
+# 	http_requests{group="canary", instance="1", job="api-server"} 400
+# 	http_requests{group="canary", instance="0", job="api-server"} 300
+# 	http_requests{group="production", instance="1", job="api-server"} 200
+# 	http_requests{group="production", instance="0", job="api-server"} 100
+# 	http_requests{group="canary", instance="2", job="api-server"} NaN
+
+# Tests for sort_by_label/sort_by_label_desc.
+clear
+load 5m
+	http_requests{job="api-server", instance="0", group="production"}	0+10x10
+	http_requests{job="api-server", instance="1", group="production"}	0+20x10
+	http_requests{job="api-server", instance="0", group="canary"}		0+30x10
+	http_requests{job="api-server", instance="1", group="canary"}		0+40x10
+	http_requests{job="api-server", instance="2", group="canary"}		NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
+	http_requests{job="app-server", instance="0", group="production"}	0+50x10
+	http_requests{job="app-server", instance="1", group="production"}	0+60x10
+	http_requests{job="app-server", instance="0", group="canary"}		0+70x10
+	http_requests{job="app-server", instance="1", group="canary"}		0+80x10
+	http_requests{job="api-server", instance="2", group="production"}	0+10x10
+	cpu_time_total{job="cpu", cpu="0"} 0+10x10
+	cpu_time_total{job="cpu", cpu="1"} 0+10x10
+	cpu_time_total{job="cpu", cpu="2"} 0+10x10
+	cpu_time_total{job="cpu", cpu="3"} 0+10x10
+	cpu_time_total{job="cpu", cpu="10"} 0+10x10
+	cpu_time_total{job="cpu", cpu="11"} 0+10x10
+	cpu_time_total{job="cpu", cpu="12"} 0+10x10
+	cpu_time_total{job="cpu", cpu="20"} 0+10x10
+	cpu_time_total{job="cpu", cpu="21"} 0+10x10
+	cpu_time_total{job="cpu", cpu="100"} 0+10x10
+	node_uname_info{job="node_exporter", instance="4m600", release="1.2.3"} 0+10x10
+	node_uname_info{job="node_exporter", instance="4m5", release="1.11.3"} 0+10x10
+	node_uname_info{job="node_exporter", instance="4m1000", release="1.111.3"} 0+10x10
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort_by_label(http_requests, "instance")
+# 	http_requests{group="production", instance="0", job="api-server"} 100
+# 	http_requests{group="canary", instance="0", job="api-server"} 300
+# 	http_requests{group="production", instance="0", job="app-server"} 500
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="production", instance="1", job="api-server"} 200
+# 	http_requests{group="canary", instance="1", job="api-server"} 400
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="production", instance="2", job="api-server"} 100
+# 	http_requests{group="canary", instance="2", job="api-server"} NaN
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort_by_label(http_requests, "instance", "group")
+# 	http_requests{group="canary", instance="0", job="api-server"} 300
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="production", instance="0", job="api-server"} 100
+# 	http_requests{group="production", instance="0", job="app-server"} 500
+# 	http_requests{group="canary", instance="1", job="api-server"} 400
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="production", instance="1", job="api-server"} 200
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+# 	http_requests{group="canary", instance="2", job="api-server"} NaN
+# 	http_requests{group="production", instance="2", job="api-server"} 100
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort_by_label(http_requests, "instance", "group")
+# 	http_requests{group="canary", instance="0", job="api-server"} 300
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="production", instance="0", job="api-server"} 100
+# 	http_requests{group="production", instance="0", job="app-server"} 500
+# 	http_requests{group="canary", instance="1", job="api-server"} 400
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="production", instance="1", job="api-server"} 200
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+# 	http_requests{group="canary", instance="2", job="api-server"} NaN
+# 	http_requests{group="production", instance="2", job="api-server"} 100
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort_by_label(http_requests, "group", "instance", "job")
+# 	http_requests{group="canary", instance="0", job="api-server"} 300
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="canary", instance="1", job="api-server"} 400
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="canary", instance="2", job="api-server"} NaN
+# 	http_requests{group="production", instance="0", job="api-server"} 100
+# 	http_requests{group="production", instance="0", job="app-server"} 500
+# 	http_requests{group="production", instance="1", job="api-server"} 200
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+# 	http_requests{group="production", instance="2", job="api-server"} 100
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort_by_label(http_requests, "job", "instance", "group")
+# 	http_requests{group="canary", instance="0", job="api-server"} 300
+# 	http_requests{group="production", instance="0", job="api-server"} 100
+# 	http_requests{group="canary", instance="1", job="api-server"} 400
+# 	http_requests{group="production", instance="1", job="api-server"} 200
+# 	http_requests{group="canary", instance="2", job="api-server"} NaN
+# 	http_requests{group="production", instance="2", job="api-server"} 100
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="production", instance="0", job="app-server"} 500
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort_by_label_desc(http_requests, "instance")
+# 	http_requests{group="production", instance="2", job="api-server"} 100
+# 	http_requests{group="canary", instance="2", job="api-server"} NaN
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+# 	http_requests{group="canary", instance="1", job="api-server"} 400
+# 	http_requests{group="production", instance="1", job="api-server"} 200
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="production", instance="0", job="app-server"} 500
+# 	http_requests{group="canary", instance="0", job="api-server"} 300
+# 	http_requests{group="production", instance="0", job="api-server"} 100
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort_by_label_desc(http_requests, "instance", "group")
+# 	http_requests{group="production", instance="2", job="api-server"} 100
+# 	http_requests{group="canary", instance="2", job="api-server"} NaN
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+# 	http_requests{group="production", instance="1", job="api-server"} 200
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="canary", instance="1", job="api-server"} 400
+# 	http_requests{group="production", instance="0", job="app-server"} 500
+# 	http_requests{group="production", instance="0", job="api-server"} 100
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="canary", instance="0", job="api-server"} 300
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort_by_label_desc(http_requests, "instance", "group", "job")
+# 	http_requests{group="production", instance="2", job="api-server"} 100
+# 	http_requests{group="canary", instance="2", job="api-server"} NaN
+# 	http_requests{group="production", instance="1", job="app-server"} 600
+# 	http_requests{group="production", instance="1", job="api-server"} 200
+# 	http_requests{group="canary", instance="1", job="app-server"} 800
+# 	http_requests{group="canary", instance="1", job="api-server"} 400
+# 	http_requests{group="production", instance="0", job="app-server"} 500
+# 	http_requests{group="production", instance="0", job="api-server"} 100
+# 	http_requests{group="canary", instance="0", job="app-server"} 700
+# 	http_requests{group="canary", instance="0", job="api-server"} 300
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort_by_label(cpu_time_total, "cpu")
+# 	cpu_time_total{job="cpu", cpu="0"} 100
+# 	cpu_time_total{job="cpu", cpu="1"} 100
+# 	cpu_time_total{job="cpu", cpu="2"} 100
+# 	cpu_time_total{job="cpu", cpu="3"} 100
+# 	cpu_time_total{job="cpu", cpu="10"} 100
+# 	cpu_time_total{job="cpu", cpu="11"} 100
+# 	cpu_time_total{job="cpu", cpu="12"} 100
+# 	cpu_time_total{job="cpu", cpu="20"} 100
+# 	cpu_time_total{job="cpu", cpu="21"} 100
+# 	cpu_time_total{job="cpu", cpu="100"} 100
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort_by_label(node_uname_info, "instance")
+# 	node_uname_info{job="node_exporter", instance="4m5", release="1.11.3"} 100
+# 	node_uname_info{job="node_exporter", instance="4m600", release="1.2.3"} 100
+# 	node_uname_info{job="node_exporter", instance="4m1000", release="1.111.3"} 100
+
+# Unsupported by streaming engine.
+# eval_ordered instant at 50m sort_by_label(node_uname_info, "release")
+# 	node_uname_info{job="node_exporter", instance="4m600", release="1.2.3"} 100
+# 	node_uname_info{job="node_exporter", instance="4m5", release="1.11.3"} 100
+# 	node_uname_info{job="node_exporter", instance="4m1000", release="1.111.3"} 100
+
+# Tests for holt_winters
+clear
+
+# positive trends
+load 10s
+	http_requests{job="api-server", instance="0", group="production"}	0+10x1000 100+30x1000
+	http_requests{job="api-server", instance="1", group="production"}	0+20x1000 200+30x1000
+	http_requests{job="api-server", instance="0", group="canary"}		0+30x1000 300+80x1000
+	http_requests{job="api-server", instance="1", group="canary"}		0+40x2000
+
+# Unsupported by streaming engine.
+# eval instant at 8000s holt_winters(http_requests[1m], 0.01, 0.1)
+# 	{job="api-server", instance="0", group="production"} 8000
+# 	{job="api-server", instance="1", group="production"} 16000
+# 	{job="api-server", instance="0", group="canary"} 24000
+# 	{job="api-server", instance="1", group="canary"} 32000
+
+# negative trends
+clear
+load 10s
+	http_requests{job="api-server", instance="0", group="production"}	8000-10x1000
+	http_requests{job="api-server", instance="1", group="production"}	0-20x1000
+	http_requests{job="api-server", instance="0", group="canary"}		0+30x1000 300-80x1000
+	http_requests{job="api-server", instance="1", group="canary"}		0-40x1000 0+40x1000
+
+# Unsupported by streaming engine.
+# eval instant at 8000s holt_winters(http_requests[1m], 0.01, 0.1)
+# 	{job="api-server", instance="0", group="production"} 0
+# 	{job="api-server", instance="1", group="production"} -16000
+# 	{job="api-server", instance="0", group="canary"} 24000
+# 	{job="api-server", instance="1", group="canary"} -32000
+
+# Tests for avg_over_time
+clear
+load 10s
+  metric 1 2 3 4 5
+  metric2 1 2 3 4 Inf
+  metric3 1 2 3 4 -Inf
+  metric4 1 2 3 Inf -Inf
+  metric5 Inf 0 Inf
+  metric5b Inf 0 Inf
+  metric5c Inf Inf Inf -Inf
+  metric6 1 2 3 -Inf -Inf
+  metric6b -Inf 0 -Inf
+  metric6c -Inf -Inf -Inf Inf
+  metric7 1 2 -Inf -Inf Inf
+  metric8 9.988465674311579e+307 9.988465674311579e+307
+  metric9 -9.988465674311579e+307 -9.988465674311579e+307 -9.988465674311579e+307
+  metric10 -9.988465674311579e+307 9.988465674311579e+307
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric[1m])
+#   {} 3
+
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric[1m])/count_over_time(metric[1m])
+#   {} 3
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric2[1m])
+#   {} Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric2[1m])/count_over_time(metric2[1m])
+#   {} Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric3[1m])
+#   {} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric3[1m])/count_over_time(metric3[1m])
+#   {} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric4[1m])
+#   {} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric4[1m])/count_over_time(metric4[1m])
+#   {} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric5[1m])
+#   {} Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric5[1m])/count_over_time(metric5[1m])
+#   {} Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric5b[1m])
+#   {} Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric5b[1m])/count_over_time(metric5b[1m])
+#   {} Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric5c[1m])
+#   {} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric5c[1m])/count_over_time(metric5c[1m])
+#   {} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric6[1m])
+#   {} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric6[1m])/count_over_time(metric6[1m])
+#   {} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric6b[1m])
+#   {} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric6b[1m])/count_over_time(metric6b[1m])
+#   {} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric6c[1m])
+#   {} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric6c[1m])/count_over_time(metric6c[1m])
+#   {} NaN
+
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric7[1m])
+#   {} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric7[1m])/count_over_time(metric7[1m])
+#   {} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric8[1m])
+#   {} 9.988465674311579e+307
+
+# This overflows float64.
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric8[1m])/count_over_time(metric8[1m])
+#   {} Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric9[1m])
+#   {} -9.988465674311579e+307
+
+# This overflows float64.
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric9[1m])/count_over_time(metric9[1m])
+#   {} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m avg_over_time(metric10[1m])
+#   {} 0
+
+# Unsupported by streaming engine.
+# eval instant at 1m sum_over_time(metric10[1m])/count_over_time(metric10[1m])
+#   {} 0
+
+# Tests for stddev_over_time and stdvar_over_time.
+clear
+load 10s
+  metric 0 8 8 2 3
+
+# Unsupported by streaming engine.
+# eval instant at 1m stdvar_over_time(metric[1m])
+#   {} 10.56
+
+# Unsupported by streaming engine.
+# eval instant at 1m stddev_over_time(metric[1m])
+#   {} 3.249615
+
+# Unsupported by streaming engine.
+# eval instant at 1m stddev_over_time((metric[1m]))
+#   {} 3.249615
+
+# Tests for stddev_over_time and stdvar_over_time #4927.
+clear
+load 10s
+  metric 1.5990505637277868 1.5990505637277868 1.5990505637277868
+
+# Unsupported by streaming engine.
+# eval instant at 1m stdvar_over_time(metric[1m])
+#   {} 0
+
+# Unsupported by streaming engine.
+# eval instant at 1m stddev_over_time(metric[1m])
+#   {} 0
+
+# Tests for mad_over_time.
+clear
+load 10s
+  metric 4 6 2 1 999 1 2
+
+# Unsupported by streaming engine.
+# eval instant at 70s mad_over_time(metric[70s])
+#   {} 1
+
+# Tests for quantile_over_time
+clear
+
+load 10s
+	data{test="two samples"} 0 1
+	data{test="three samples"} 0 1 2
+	data{test="uneven samples"} 0 1 4
+
+# Unsupported by streaming engine.
+# eval instant at 1m quantile_over_time(0, data[1m])
+# 	{test="two samples"} 0
+# 	{test="three samples"} 0
+# 	{test="uneven samples"} 0
+
+# Unsupported by streaming engine.
+# eval instant at 1m quantile_over_time(0.5, data[1m])
+# 	{test="two samples"} 0.5
+# 	{test="three samples"} 1
+# 	{test="uneven samples"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 1m quantile_over_time(0.75, data[1m])
+# 	{test="two samples"} 0.75
+# 	{test="three samples"} 1.5
+# 	{test="uneven samples"} 2.5
+
+# Unsupported by streaming engine.
+# eval instant at 1m quantile_over_time(0.8, data[1m])
+# 	{test="two samples"} 0.8
+# 	{test="three samples"} 1.6
+# 	{test="uneven samples"} 2.8
+
+# Unsupported by streaming engine.
+# eval instant at 1m quantile_over_time(1, data[1m])
+# 	{test="two samples"} 1
+# 	{test="three samples"} 2
+# 	{test="uneven samples"} 4
+
+# Unsupported by streaming engine.
+# eval instant at 1m quantile_over_time(-1, data[1m])
+# 	{test="two samples"} -Inf
+# 	{test="three samples"} -Inf
+# 	{test="uneven samples"} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m quantile_over_time(2, data[1m])
+# 	{test="two samples"} +Inf
+# 	{test="three samples"} +Inf
+# 	{test="uneven samples"} +Inf
+
+# Unsupported by streaming engine.
+# eval instant at 1m (quantile_over_time(2, (data[1m])))
+# 	{test="two samples"} +Inf
+# 	{test="three samples"} +Inf
+# 	{test="uneven samples"} +Inf
+
+clear
+
+# Test time-related functions.
+# Unsupported by streaming engine.
+# eval instant at 0m year()
+#   {} 1970
+
+# Unsupported by streaming engine.
+# eval instant at 1ms time()
+#   0.001
+
+# Unsupported by streaming engine.
+# eval instant at 50m time()
+#   3000
+
+# Unsupported by streaming engine.
+# eval instant at 0m year(vector(1136239445))
+#   {} 2006
+
+# Unsupported by streaming engine.
+# eval instant at 0m month()
+#   {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 0m month(vector(1136239445))
+#   {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 0m day_of_month()
+#   {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 0m day_of_month(vector(1136239445))
+#   {} 2
+
+# Unsupported by streaming engine.
+# eval instant at 0m day_of_year()
+#   {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 0m day_of_year(vector(1136239445))
+#   {} 2
+
+# Thursday.
+# Unsupported by streaming engine.
+# eval instant at 0m day_of_week()
+#   {} 4
+
+# Unsupported by streaming engine.
+# eval instant at 0m day_of_week(vector(1136239445))
+#   {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 0m hour()
+#   {} 0
+
+# Unsupported by streaming engine.
+# eval instant at 0m hour(vector(1136239445))
+#   {} 22
+
+# Unsupported by streaming engine.
+# eval instant at 0m minute()
+#   {} 0
+
+# Unsupported by streaming engine.
+# eval instant at 0m minute(vector(1136239445))
+#   {} 4
+
+# 2008-12-31 23:59:59 just before leap second.
+# Unsupported by streaming engine.
+# eval instant at 0m year(vector(1230767999))
+#   {} 2008
+
+# 2009-01-01 00:00:00 just after leap second.
+# Unsupported by streaming engine.
+# eval instant at 0m year(vector(1230768000))
+#   {} 2009
+
+# 2016-02-29 23:59:59 February 29th in leap year.
+# Unsupported by streaming engine.
+# eval instant at 0m month(vector(1456790399)) + day_of_month(vector(1456790399)) / 100
+#   {} 2.29
+
+# 2016-03-01 00:00:00 March 1st in leap year.
+# Unsupported by streaming engine.
+# eval instant at 0m month(vector(1456790400)) + day_of_month(vector(1456790400)) / 100
+#   {} 3.01
+
+# 2016-12-31 13:37:00 366th day in leap year.
+# Unsupported by streaming engine.
+# eval instant at 0m day_of_year(vector(1483191420))
+#   {} 366
+
+# 2022-12-31 13:37:00 365th day in non-leap year.
+# Unsupported by streaming engine.
+# eval instant at 0m day_of_year(vector(1672493820))
+#   {} 365
+
+# February 1st 2016 in leap year.
+# Unsupported by streaming engine.
+# eval instant at 0m days_in_month(vector(1454284800))
+#   {} 29
+
+# February 1st 2017 not in leap year.
+# Unsupported by streaming engine.
+# eval instant at 0m days_in_month(vector(1485907200))
+#   {} 28
+
+clear
+
+# Test duplicate labelset in promql output.
+load 5m
+  testmetric1{src="a",dst="b"} 0
+  testmetric2{src="a",dst="b"} 1
+
+# Unsupported by streaming engine.
+# eval_fail instant at 0m changes({__name__=~'testmetric1|testmetric2'}[5m])
+
+# Tests for *_over_time
+clear
+
+load 10s
+	data{type="numbers"} 2 0 3
+	data{type="some_nan"} 2 0 NaN
+	data{type="some_nan2"} 2 NaN 1
+	data{type="some_nan3"} NaN 0 1
+	data{type="only_nan"} NaN NaN NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m min_over_time(data[1m])
+# 	{type="numbers"} 0
+# 	{type="some_nan"} 0
+# 	{type="some_nan2"} 1
+# 	{type="some_nan3"} 0
+# 	{type="only_nan"} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m max_over_time(data[1m])
+# 	{type="numbers"} 3
+# 	{type="some_nan"} 2
+# 	{type="some_nan2"} 2
+# 	{type="some_nan3"} 1
+# 	{type="only_nan"} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 1m last_over_time(data[1m])
+# 	data{type="numbers"} 3
+# 	data{type="some_nan"} NaN
+# 	data{type="some_nan2"} 1
+# 	data{type="some_nan3"} 1
+# 	data{type="only_nan"} NaN
+
+clear
+
+# Test for absent()
+# Unsupported by streaming engine.
+# eval instant at 50m absent(nonexistent)
+# 	{} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m absent(nonexistent{job="testjob", instance="testinstance", method=~".x"})
+# 	{instance="testinstance", job="testjob"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m absent(nonexistent{job="testjob",job="testjob2",foo="bar"})
+# 	{foo="bar"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m absent(nonexistent{job="testjob",job="testjob2",job="three",foo="bar"})
+# 	{foo="bar"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m absent(nonexistent{job="testjob",job=~"testjob2",foo="bar"})
+# 	{foo="bar"} 1
+
+clear
+
+# Don't return anything when there's something there.
+load 5m
+	http_requests{job="api-server", instance="0", group="production"}	0+10x10
+
+# Unsupported by streaming engine.
+# eval instant at 50m absent(http_requests)
+
+# Unsupported by streaming engine.
+# eval instant at 50m absent(sum(http_requests))
+
+clear
+
+# Unsupported by streaming engine.
+# eval instant at 50m absent(sum(nonexistent{job="testjob", instance="testinstance"}))
+# 	{} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m absent(max(nonexistant))
+# 	{} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m absent(nonexistant > 1)
+# 	{} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m absent(a + b)
+# 	{} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m absent(a and b)
+# 	{} 1
+
+# Unsupported by streaming engine.
+# eval instant at 50m absent(rate(nonexistant[5m]))
+# 	{} 1
+
+clear
+
+# Testdata for absent_over_time()
+# Unsupported by streaming engine.
+# eval instant at 1m absent_over_time(http_requests[5m])
+#     {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 1m absent_over_time(http_requests{handler="/foo"}[5m])
+#     {handler="/foo"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 1m absent_over_time(http_requests{handler!="/foo"}[5m])
+#     {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 1m absent_over_time(http_requests{handler="/foo", handler="/bar", handler="/foobar"}[5m])
+#     {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 1m absent_over_time(rate(nonexistant[5m])[5m:])
+#     {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 1m absent_over_time(http_requests{handler="/foo", handler="/bar", instance="127.0.0.1"}[5m])
+#     {instance="127.0.0.1"} 1
+
+load 1m
+	http_requests{path="/foo",instance="127.0.0.1",job="httpd"}	1+1x10
+	http_requests{path="/bar",instance="127.0.0.1",job="httpd"}	1+1x10
+	httpd_handshake_failures_total{instance="127.0.0.1",job="node"}	1+1x15
+	httpd_log_lines_total{instance="127.0.0.1",job="node"}	1
+	ssl_certificate_expiry_seconds{job="ingress"} NaN NaN NaN NaN NaN
+
+# Unsupported by streaming engine.
+# eval instant at 5m absent_over_time(http_requests[5m])
+
+# Unsupported by streaming engine.
+# eval instant at 5m absent_over_time(rate(http_requests[5m])[5m:1m])
+
+# Unsupported by streaming engine.
+# eval instant at 0m absent_over_time(httpd_log_lines_total[30s])
+
+# Unsupported by streaming engine.
+# eval instant at 1m absent_over_time(httpd_log_lines_total[30s])
+#     {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 15m absent_over_time(http_requests[5m])
+
+# Unsupported by streaming engine.
+# eval instant at 16m absent_over_time(http_requests[5m])
+#     {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 16m absent_over_time(http_requests[6m])
+
+# Unsupported by streaming engine.
+# eval instant at 16m absent_over_time(httpd_handshake_failures_total[1m])
+
+# Unsupported by streaming engine.
+# eval instant at 16m absent_over_time({instance="127.0.0.1"}[5m])
+
+# Unsupported by streaming engine.
+# eval instant at 21m absent_over_time({instance="127.0.0.1"}[5m])
+#     {instance="127.0.0.1"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 21m absent_over_time({instance="127.0.0.1"}[20m])
+
+# Unsupported by streaming engine.
+# eval instant at 21m absent_over_time({job="grok"}[20m])
+#     {job="grok"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 30m absent_over_time({instance="127.0.0.1"}[5m:5s])
+#     {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 5m absent_over_time({job="ingress"}[4m])
+
+# Unsupported by streaming engine.
+# eval instant at 10m absent_over_time({job="ingress"}[4m])
+# 	{job="ingress"} 1
+
+clear
+
+# Testdata for present_over_time()
+# Unsupported by streaming engine.
+# eval instant at 1m present_over_time(http_requests[5m])
+
+# Unsupported by streaming engine.
+# eval instant at 1m present_over_time(http_requests{handler="/foo"}[5m])
+
+# Unsupported by streaming engine.
+# eval instant at 1m present_over_time(http_requests{handler!="/foo"}[5m])
+
+# Unsupported by streaming engine.
+# eval instant at 1m present_over_time(http_requests{handler="/foo", handler="/bar", handler="/foobar"}[5m])
+
+# Unsupported by streaming engine.
+# eval instant at 1m present_over_time(rate(nonexistant[5m])[5m:])
+
+# Unsupported by streaming engine.
+# eval instant at 1m present_over_time(http_requests{handler="/foo", handler="/bar", instance="127.0.0.1"}[5m])
+
+load 1m
+	http_requests{path="/foo",instance="127.0.0.1",job="httpd"}	1+1x10
+	http_requests{path="/bar",instance="127.0.0.1",job="httpd"}	1+1x10
+	httpd_handshake_failures_total{instance="127.0.0.1",job="node"}	1+1x15
+	httpd_log_lines_total{instance="127.0.0.1",job="node"}	1
+	ssl_certificate_expiry_seconds{job="ingress"} NaN NaN NaN NaN NaN
+
+# Unsupported by streaming engine.
+# eval instant at 5m present_over_time(http_requests[5m])
+#     {instance="127.0.0.1", job="httpd", path="/bar"} 1
+#     {instance="127.0.0.1", job="httpd", path="/foo"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 5m present_over_time(rate(http_requests[5m])[5m:1m])
+#     {instance="127.0.0.1", job="httpd", path="/bar"} 1
+#     {instance="127.0.0.1", job="httpd", path="/foo"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 0m present_over_time(httpd_log_lines_total[30s])
+#     {instance="127.0.0.1",job="node"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 1m present_over_time(httpd_log_lines_total[30s])
+
+# Unsupported by streaming engine.
+# eval instant at 15m present_over_time(http_requests[5m])
+#     {instance="127.0.0.1", job="httpd", path="/bar"} 1
+#     {instance="127.0.0.1", job="httpd", path="/foo"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 16m present_over_time(http_requests[5m])
+
+# Unsupported by streaming engine.
+# eval instant at 16m present_over_time(http_requests[6m])
+#     {instance="127.0.0.1", job="httpd", path="/bar"} 1
+#     {instance="127.0.0.1", job="httpd", path="/foo"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 16m present_over_time(httpd_handshake_failures_total[1m])
+#     {instance="127.0.0.1", job="node"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 16m present_over_time({instance="127.0.0.1"}[5m])
+#     {instance="127.0.0.1",job="node"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 21m present_over_time({job="grok"}[20m])
+
+# Unsupported by streaming engine.
+# eval instant at 30m present_over_time({instance="127.0.0.1"}[5m:5s])
+
+# Unsupported by streaming engine.
+# eval instant at 5m present_over_time({job="ingress"}[4m])
+#     {job="ingress"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 10m present_over_time({job="ingress"}[4m])
+
+clear
+
+# Testing exp() sqrt() log2() log10() ln()
+load 5m
+	exp_root_log{l="x"} 10
+	exp_root_log{l="y"} 20
+
+# Unsupported by streaming engine.
+# eval instant at 5m exp(exp_root_log)
+# 	{l="x"} 22026.465794806718
+# 	{l="y"} 485165195.4097903
+
+# Unsupported by streaming engine.
+# eval instant at 5m exp(exp_root_log - 10)
+# 	{l="y"} 22026.465794806718
+# 	{l="x"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 5m exp(exp_root_log - 20)
+# 	{l="x"} 4.5399929762484854e-05
+# 	{l="y"} 1
+
+# Unsupported by streaming engine.
+# eval instant at 5m ln(exp_root_log)
+# 	{l="x"} 2.302585092994046
+# 	{l="y"} 2.995732273553991
+
+# Unsupported by streaming engine.
+# eval instant at 5m ln(exp_root_log - 10)
+# 	{l="y"} 2.302585092994046
+# 	{l="x"} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 5m ln(exp_root_log - 20)
+# 	{l="y"} -Inf
+# 	{l="x"} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 5m exp(ln(exp_root_log))
+# 	{l="y"} 20
+# 	{l="x"} 10
+
+# Unsupported by streaming engine.
+# eval instant at 5m sqrt(exp_root_log)
+# 	{l="x"} 3.1622776601683795
+# 	{l="y"} 4.47213595499958
+
+# Unsupported by streaming engine.
+# eval instant at 5m log2(exp_root_log)
+# 	{l="x"} 3.3219280948873626
+# 	{l="y"} 4.321928094887363
+
+# Unsupported by streaming engine.
+# eval instant at 5m log2(exp_root_log - 10)
+# 	{l="y"} 3.3219280948873626
+# 	{l="x"} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 5m log2(exp_root_log - 20)
+# 	{l="x"} NaN
+# 	{l="y"} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 5m log10(exp_root_log)
+# 	{l="x"} 1
+# 	{l="y"} 1.301029995663981
+
+# Unsupported by streaming engine.
+# eval instant at 5m log10(exp_root_log - 10)
+# 	{l="y"} 1
+# 	{l="x"} -Inf
+
+# Unsupported by streaming engine.
+# eval instant at 5m log10(exp_root_log - 20)
+# 	{l="x"} NaN
+# 	{l="y"} -Inf
+
+clear
diff --git a/pkg/streamingpromql/testdata/upstream/histograms.test.disabled b/pkg/streamingpromql/testdata/upstream/histograms.test.disabled
new file mode 100644
index 0000000000..666804fb61
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/histograms.test.disabled
@@ -0,0 +1,239 @@
+# Two histograms with 4 buckets each (x_sum and x_count not included,
+# only buckets). Lowest bucket for one histogram < 0, for the other >
+# 0. They have the same name, just separated by label. Not useful in
+# practice, but can happen (if clients change bucketing), and the
+# server has to cope with it.
+
+# Test histogram.
+load 5m
+	testhistogram_bucket{le="0.1", start="positive"}	0+5x10
+	testhistogram_bucket{le=".2", start="positive"}		0+7x10
+	testhistogram_bucket{le="1e0", start="positive"}	0+11x10
+	testhistogram_bucket{le="+Inf", start="positive"}	0+12x10
+	testhistogram_bucket{le="-.2", start="negative"}	0+1x10
+	testhistogram_bucket{le="-0.1", start="negative"}	0+2x10
+	testhistogram_bucket{le="0.3", start="negative"}	0+2x10
+	testhistogram_bucket{le="+Inf", start="negative"}	0+3x10
+
+# Another test histogram, where q(1/6), q(1/2), and q(5/6) are each in
+# the middle of a bucket and should therefore be 1, 3, and 5,
+# respectively.
+load 5m
+	testhistogram2_bucket{le="0"}	 0+0x10
+	testhistogram2_bucket{le="2"}	 0+1x10
+	testhistogram2_bucket{le="4"}    0+2x10
+	testhistogram2_bucket{le="6"}	 0+3x10
+	testhistogram2_bucket{le="+Inf"} 0+3x10
+
+# Now a more realistic histogram per job and instance to test aggregation.
+load 5m
+	request_duration_seconds_bucket{job="job1", instance="ins1", le="0.1"}	0+1x10
+	request_duration_seconds_bucket{job="job1", instance="ins1", le="0.2"}	0+3x10
+	request_duration_seconds_bucket{job="job1", instance="ins1", le="+Inf"}	0+4x10
+	request_duration_seconds_bucket{job="job1", instance="ins2", le="0.1"}	0+2x10
+	request_duration_seconds_bucket{job="job1", instance="ins2", le="0.2"}	0+5x10
+	request_duration_seconds_bucket{job="job1", instance="ins2", le="+Inf"}	0+6x10
+	request_duration_seconds_bucket{job="job2", instance="ins1", le="0.1"}	0+3x10
+	request_duration_seconds_bucket{job="job2", instance="ins1", le="0.2"}	0+4x10
+	request_duration_seconds_bucket{job="job2", instance="ins1", le="+Inf"}	0+6x10
+	request_duration_seconds_bucket{job="job2", instance="ins2", le="0.1"}	0+4x10
+	request_duration_seconds_bucket{job="job2", instance="ins2", le="0.2"}	0+7x10
+	request_duration_seconds_bucket{job="job2", instance="ins2", le="+Inf"}	0+9x10
+
+# Different le representations in one histogram.
+load 5m
+	mixed_bucket{job="job1", instance="ins1", le="0.1"}	0+1x10
+	mixed_bucket{job="job1", instance="ins1", le="0.2"}	0+1x10
+	mixed_bucket{job="job1", instance="ins1", le="2e-1"}	0+1x10
+	mixed_bucket{job="job1", instance="ins1", le="2.0e-1"}	0+1x10
+	mixed_bucket{job="job1", instance="ins1", le="+Inf"}	0+4x10
+	mixed_bucket{job="job1", instance="ins2", le="+inf"}	0+0x10
+	mixed_bucket{job="job1", instance="ins2", le="+Inf"}	0+0x10
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/histograms.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+# Quantile too low.
+eval instant at 50m histogram_quantile(-0.1, testhistogram_bucket)
+	{start="positive"} -Inf
+	{start="negative"} -Inf
+
+# Quantile too high.
+eval instant at 50m histogram_quantile(1.01, testhistogram_bucket)
+	{start="positive"} +Inf
+	{start="negative"} +Inf
+
+# Quantile invalid.
+eval instant at 50m histogram_quantile(NaN, testhistogram_bucket)
+	{start="positive"} NaN
+	{start="negative"} NaN
+
+# Quantile value in lowest bucket, which is positive.
+eval instant at 50m histogram_quantile(0, testhistogram_bucket{start="positive"})
+	{start="positive"} 0
+
+# Quantile value in lowest bucket, which is negative.
+eval instant at 50m histogram_quantile(0, testhistogram_bucket{start="negative"})
+	{start="negative"} -0.2
+
+# Quantile value in highest bucket.
+eval instant at 50m histogram_quantile(1, testhistogram_bucket)
+	{start="positive"} 1
+	{start="negative"} 0.3
+
+# Finally some useful quantiles.
+eval instant at 50m histogram_quantile(0.2, testhistogram_bucket)
+	{start="positive"} 0.048
+	{start="negative"} -0.2
+
+
+eval instant at 50m histogram_quantile(0.5, testhistogram_bucket)
+	{start="positive"} 0.15
+	{start="negative"} -0.15
+
+eval instant at 50m histogram_quantile(0.8, testhistogram_bucket)
+	{start="positive"} 0.72
+	{start="negative"} 0.3
+
+# More realistic with rates.
+eval instant at 50m histogram_quantile(0.2, rate(testhistogram_bucket[5m]))
+	{start="positive"} 0.048
+	{start="negative"} -0.2
+
+eval instant at 50m histogram_quantile(0.5, rate(testhistogram_bucket[5m]))
+	{start="positive"} 0.15
+	{start="negative"} -0.15
+
+eval instant at 50m histogram_quantile(0.8, rate(testhistogram_bucket[5m]))
+	{start="positive"} 0.72
+	{start="negative"} 0.3
+
+# Want results exactly in the middle of the bucket.
+eval instant at 7m histogram_quantile(1./6., testhistogram2_bucket)
+	{} 1
+
+eval instant at 7m histogram_quantile(0.5, testhistogram2_bucket)
+	{} 3
+
+eval instant at 7m histogram_quantile(5./6., testhistogram2_bucket)
+	{} 5
+
+eval instant at 47m histogram_quantile(1./6., rate(testhistogram2_bucket[15m]))
+	{} 1
+
+eval instant at 47m histogram_quantile(0.5, rate(testhistogram2_bucket[15m]))
+	{} 3
+
+eval instant at 47m histogram_quantile(5./6., rate(testhistogram2_bucket[15m]))
+	{} 5
+
+# Aggregated histogram: Everything in one.
+eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le))
+	{} 0.075
+
+eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le))
+	{} 0.1277777777777778
+
+# Aggregated histogram: Everything in one. Now with avg, which does not change anything.
+eval instant at 50m histogram_quantile(0.3, avg(rate(request_duration_seconds_bucket[5m])) by (le))
+	{} 0.075
+
+eval instant at 50m histogram_quantile(0.5, avg(rate(request_duration_seconds_bucket[5m])) by (le))
+	{} 0.12777777777777778
+
+# Aggregated histogram: By instance.
+eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, instance))
+	{instance="ins1"} 0.075
+	{instance="ins2"} 0.075
+
+eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, instance))
+	{instance="ins1"} 0.1333333333
+	{instance="ins2"} 0.125
+
+# Aggregated histogram: By job.
+eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job))
+	{job="job1"} 0.1
+	{job="job2"} 0.0642857142857143
+
+eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, job))
+	{job="job1"} 0.14
+	{job="job2"} 0.1125
+
+# Aggregated histogram: By job and instance.
+eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job, instance))
+	{instance="ins1", job="job1"} 0.11
+	{instance="ins2", job="job1"} 0.09
+	{instance="ins1", job="job2"} 0.06
+	{instance="ins2", job="job2"} 0.0675
+
+eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, job, instance))
+	{instance="ins1", job="job1"} 0.15
+	{instance="ins2", job="job1"} 0.1333333333333333
+	{instance="ins1", job="job2"} 0.1
+	{instance="ins2", job="job2"} 0.1166666666666667
+
+# The unaggregated histogram for comparison. Same result as the previous one.
+eval instant at 50m histogram_quantile(0.3, rate(request_duration_seconds_bucket[5m]))
+	{instance="ins1", job="job1"} 0.11
+	{instance="ins2", job="job1"} 0.09
+	{instance="ins1", job="job2"} 0.06
+	{instance="ins2", job="job2"} 0.0675
+
+eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds_bucket[5m]))
+	{instance="ins1", job="job1"} 0.15
+	{instance="ins2", job="job1"} 0.13333333333333333
+	{instance="ins1", job="job2"} 0.1
+	{instance="ins2", job="job2"} 0.11666666666666667
+
+# A histogram with nonmonotonic bucket counts. This may happen when recording
+# rule evaluation or federation races scrape ingestion, causing some buckets
+# counts to be derived from fewer samples.
+
+load 5m
+    nonmonotonic_bucket{le="0.1"}   0+2x10
+    nonmonotonic_bucket{le="1"}     0+1x10
+    nonmonotonic_bucket{le="10"}    0+5x10
+    nonmonotonic_bucket{le="100"}   0+4x10
+    nonmonotonic_bucket{le="1000"}  0+9x10
+    nonmonotonic_bucket{le="+Inf"}  0+8x10
+
+# Nonmonotonic buckets
+eval instant at 50m histogram_quantile(0.01, nonmonotonic_bucket)
+    {} 0.0045
+
+eval instant at 50m histogram_quantile(0.5, nonmonotonic_bucket)
+    {} 8.5
+
+eval instant at 50m histogram_quantile(0.99, nonmonotonic_bucket)
+    {} 979.75
+
+# Buckets with different representations of the same upper bound.
+eval instant at 50m histogram_quantile(0.5, rate(mixed_bucket[5m]))
+	{instance="ins1", job="job1"} 0.15
+	{instance="ins2", job="job1"} NaN
+
+eval instant at 50m histogram_quantile(0.75, rate(mixed_bucket[5m]))
+	{instance="ins1", job="job1"} 0.2
+	{instance="ins2", job="job1"} NaN
+
+eval instant at 50m histogram_quantile(1, rate(mixed_bucket[5m]))
+	{instance="ins1", job="job1"} 0.2
+	{instance="ins2", job="job1"} NaN
+
+load 5m
+	empty_bucket{le="0.1", job="job1", instance="ins1"}    0x10
+	empty_bucket{le="0.2", job="job1", instance="ins1"}    0x10
+	empty_bucket{le="+Inf", job="job1", instance="ins1"}   0x10
+
+eval instant at 50m histogram_quantile(0.2, rate(empty_bucket[5m]))
+	{instance="ins1", job="job1"} NaN
+
+# Load a duplicate histogram with a different name to test failure scenario on multiple histograms with the same label set
+# https://github.com/prometheus/prometheus/issues/9910
+load 5m
+	request_duration_seconds2_bucket{job="job1", instance="ins1", le="0.1"}	0+1x10
+	request_duration_seconds2_bucket{job="job1", instance="ins1", le="0.2"}	0+3x10
+	request_duration_seconds2_bucket{job="job1", instance="ins1", le="+Inf"}	0+4x10
+
+eval_fail instant at 50m histogram_quantile(0.99, {__name__=~"request_duration.*"})
diff --git a/pkg/streamingpromql/testdata/upstream/literals.test.disabled b/pkg/streamingpromql/testdata/upstream/literals.test.disabled
new file mode 100644
index 0000000000..1638b970f2
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/literals.test.disabled
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/literals.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+eval instant at 50m 12.34e6
+	12340000
+
+eval instant at 50m 12.34e+6
+	12340000
+
+eval instant at 50m 12.34e-6
+	0.00001234
+
+eval instant at 50m 1+1
+	2
+
+eval instant at 50m 1-1
+	0
+
+eval instant at 50m 1 - -1
+	2
+
+eval instant at 50m .2
+	0.2
+
+eval instant at 50m +0.2
+	0.2
+
+eval instant at 50m -0.2e-6
+	-0.0000002
+
+eval instant at 50m +Inf
+	+Inf
+
+eval instant at 50m inF
+	+Inf
+
+eval instant at 50m -inf
+	-Inf
+
+eval instant at 50m NaN
+	NaN
+
+eval instant at 50m nan
+	NaN
+
+eval instant at 50m 2.
+	2
+
+eval instant at 50m 1 / 0
+	+Inf
+
+eval instant at 50m ((1) / (0))
+	+Inf
+
+eval instant at 50m -1 / 0
+	-Inf
+
+eval instant at 50m 0 / 0
+	NaN
+
+eval instant at 50m 1 % 0
+	NaN
diff --git a/pkg/streamingpromql/testdata/upstream/native_histograms.test.disabled b/pkg/streamingpromql/testdata/upstream/native_histograms.test.disabled
new file mode 100644
index 0000000000..392294edd3
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/native_histograms.test.disabled
@@ -0,0 +1,276 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/native_histograms.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+# Minimal valid case: an empty histogram.
+load 5m
+	empty_histogram	{{}}
+
+eval instant at 5m empty_histogram
+	{__name__="empty_histogram"} {{}}
+
+eval instant at 5m histogram_count(empty_histogram)
+	{} 0
+
+eval instant at 5m histogram_sum(empty_histogram)
+	{} 0
+
+eval instant at 5m histogram_avg(empty_histogram)
+	{} NaN
+
+eval instant at 5m histogram_fraction(-Inf, +Inf, empty_histogram)
+	{} NaN
+
+eval instant at 5m histogram_fraction(0, 8, empty_histogram)
+	{} NaN
+
+
+
+# buckets:[1 2 1] means 1 observation in the 1st bucket, 2 observations in the 2nd and 1 observation in the 3rd (total 4).
+load 5m
+	single_histogram	{{schema:0 sum:5 count:4 buckets:[1 2 1]}}
+
+# histogram_count extracts the count property from the histogram.
+eval instant at 5m histogram_count(single_histogram)
+	{} 4
+
+# histogram_sum extracts the sum property from the histogram.
+eval instant at 5m histogram_sum(single_histogram)
+	{} 5
+
+# histogram_avg calculates the average from sum and count properties.
+eval instant at 5m histogram_avg(single_histogram)
+	{} 1.25
+
+# We expect half of the values to fall in the range 1 < x <= 2.
+eval instant at 5m histogram_fraction(1, 2, single_histogram)
+	{} 0.5
+
+# We expect all values to fall in the range 0 < x <= 8.
+eval instant at 5m histogram_fraction(0, 8, single_histogram)
+	{} 1
+
+# Median is 1.5 due to linear estimation of the midpoint of the middle bucket, whose values are within range 1 < x <= 2.
+eval instant at 5m histogram_quantile(0.5, single_histogram)
+	{} 1.5
+
+
+
+# Repeat the same histogram 10 times.
+load 5m
+	multi_histogram	{{schema:0 sum:5 count:4 buckets:[1 2 1]}}x10
+
+eval instant at 5m histogram_count(multi_histogram)
+	{} 4
+
+eval instant at 5m histogram_sum(multi_histogram)
+	{} 5
+
+eval instant at 5m histogram_avg(multi_histogram)
+	{} 1.25
+
+eval instant at 5m histogram_fraction(1, 2, multi_histogram)
+	{} 0.5
+
+eval instant at 5m histogram_quantile(0.5, multi_histogram)
+	{} 1.5
+
+
+# Each entry should look the same as the first.
+eval instant at 50m histogram_count(multi_histogram)
+	{} 4
+
+eval instant at 50m histogram_sum(multi_histogram)
+	{} 5
+
+eval instant at 50m histogram_avg(multi_histogram)
+	{} 1.25
+
+eval instant at 50m histogram_fraction(1, 2, multi_histogram)
+	{} 0.5
+
+eval instant at 50m histogram_quantile(0.5, multi_histogram)
+	{} 1.5
+
+
+
+# Accumulate the histogram addition for 10 iterations, offset is a bucket position where offset:0 is always the bucket
+# with an upper limit of 1 and offset:1 is the bucket which follows to the right. Negative offsets represent bucket
+# positions for upper limits <1 (tending toward zero), where offset:-1 is the bucket to the left of offset:0.
+load 5m
+	incr_histogram	{{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:1 buckets:[1] offset:1}}x10
+
+eval instant at 5m histogram_count(incr_histogram)
+	{} 5
+
+eval instant at 5m histogram_sum(incr_histogram)
+	{} 6
+
+eval instant at 5m histogram_avg(incr_histogram)
+	{} 1.2
+
+# We expect 3/5ths of the values to fall in the range 1 < x <= 2.
+eval instant at 5m histogram_fraction(1, 2, incr_histogram)
+	{} 0.6
+
+eval instant at 5m histogram_quantile(0.5, incr_histogram)
+	{} 1.5
+
+
+eval instant at 50m incr_histogram
+	{__name__="incr_histogram"} {{count:14 sum:24 buckets:[1 12 1]}}
+
+eval instant at 50m histogram_count(incr_histogram)
+	{} 14
+
+eval instant at 50m histogram_sum(incr_histogram)
+	{} 24
+
+eval instant at 50m histogram_avg(incr_histogram)
+    {} 1.7142857142857142
+
+# We expect 12/14ths of the values to fall in the range 1 < x <= 2.
+eval instant at 50m histogram_fraction(1, 2, incr_histogram)
+	{} 0.8571428571428571
+
+eval instant at 50m histogram_quantile(0.5, incr_histogram)
+	{} 1.5
+
+# Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum.
+eval instant at 50m rate(incr_histogram[5m])
+	{} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}}
+
+# Calculate the 50th percentile of observations over the last 10m.
+eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m]))
+	{} 1.5
+
+
+
+# Schema represents the histogram resolution, different schema have compatible bucket boundaries, e.g.:
+#  0: 1 2 4 8 16 32 64 (higher resolution)
+# -1: 1   4   16    64  (lower resolution)
+#
+# Histograms can be merged as long as the histogram to the right is same resolution or higher.
+load 5m
+	low_res_histogram	{{schema:-1 sum:4 count:1 buckets:[1] offset:1}}+{{schema:0 sum:4 count:4 buckets:[2 2] offset:1}}x1
+
+eval instant at 5m low_res_histogram
+	{__name__="low_res_histogram"} {{schema:-1 count:5 sum:8 offset:1 buckets:[5]}}
+
+eval instant at 5m histogram_count(low_res_histogram)
+	{} 5
+
+eval instant at 5m histogram_sum(low_res_histogram)
+	{} 8
+
+eval instant at 5m histogram_avg(low_res_histogram)
+	{} 1.6
+
+# We expect all values to fall into the lower-resolution bucket with the range 1 < x <= 4.
+eval instant at 5m histogram_fraction(1, 4, low_res_histogram)
+	{} 1
+
+
+
+# z_bucket:1 means there is one observation in the zero bucket and z_bucket_w:0.5 means the zero bucket has the range
+# 0 < x <= 0.5. Sum and count are expected to represent all observations in the histogram, including those in the zero bucket.
+load 5m
+	single_zero_histogram {{schema:0 z_bucket:1 z_bucket_w:0.5 sum:0.25 count:1}}
+
+eval instant at 5m histogram_count(single_zero_histogram)
+	{} 1
+
+eval instant at 5m histogram_sum(single_zero_histogram)
+	{} 0.25
+
+eval instant at 5m histogram_avg(single_zero_histogram)
+	{} 0.25
+
+# When only the zero bucket is populated, or there are negative buckets, the distribution is assumed to be equally
+# distributed around zero; i.e. that there are an equal number of positive and negative observations. Therefore the
+# entire distribution must lie within the full range of the zero bucket, in this case: -0.5 < x <= +0.5.
+eval instant at 5m histogram_fraction(-0.5, 0.5, single_zero_histogram)
+	{} 1
+
+# Half of the observations are estimated to be zero, as this is the midpoint between -0.5 and +0.5.
+eval instant at 5m histogram_quantile(0.5, single_zero_histogram)
+	{} 0
+
+
+
+# Let's turn single_histogram upside-down.
+load 5m
+	negative_histogram {{schema:0 sum:-5 count:4 n_buckets:[1 2 1]}}
+
+eval instant at 5m histogram_count(negative_histogram)
+	{} 4
+
+eval instant at 5m histogram_sum(negative_histogram)
+	{} -5
+
+eval instant at 5m histogram_avg(negative_histogram)
+	{} -1.25
+
+# We expect half of the values to fall in the range -2 < x <= -1.
+eval instant at 5m histogram_fraction(-2, -1, negative_histogram)
+	{} 0.5
+
+eval instant at 5m histogram_quantile(0.5, negative_histogram)
+	{} -1.5
+
+
+
+# Two histogram samples.
+load 5m
+	two_samples_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}} {{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}}
+
+# We expect to see the newest sample.
+eval instant at 10m histogram_count(two_samples_histogram)
+	{} 4
+
+eval instant at 10m histogram_sum(two_samples_histogram)
+	{} -4
+
+eval instant at 10m histogram_avg(two_samples_histogram)
+	{} -1
+
+eval instant at 10m histogram_fraction(-2, -1, two_samples_histogram)
+	{} 0.5
+
+eval instant at 10m histogram_quantile(0.5, two_samples_histogram)
+	{} -1.5
+
+
+
+# Add two histograms with negated data.
+load 5m
+	balanced_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}}x1
+
+eval instant at 5m histogram_count(balanced_histogram)
+	{} 8
+
+eval instant at 5m histogram_sum(balanced_histogram)
+	{} 0
+
+eval instant at 5m histogram_avg(balanced_histogram)
+	{} 0
+
+eval instant at 5m histogram_fraction(0, 4, balanced_histogram)
+	{} 0.5
+
+# If the quantile happens to be located in a span of empty buckets, the actually returned value is the lower bound of
+# the first populated bucket after the span of empty buckets.
+eval instant at 5m histogram_quantile(0.5, balanced_histogram)
+	{} 0.5
+
+# Add histogram to test sum(last_over_time) regression
+load 5m
+    incr_sum_histogram{number="1"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:1 count:1 buckets:[1]}}x10
+    incr_sum_histogram{number="2"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:2 count:1 buckets:[1]}}x10
+
+eval instant at 50m histogram_sum(sum(incr_sum_histogram))
+    {} 30
+
+eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m])))
+    {} 30
diff --git a/pkg/streamingpromql/testdata/upstream/operators.test.disabled b/pkg/streamingpromql/testdata/upstream/operators.test.disabled
new file mode 100644
index 0000000000..14bf0b103d
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/operators.test.disabled
@@ -0,0 +1,494 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/operators.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+load 5m
+	http_requests{job="api-server", instance="0", group="production"}	0+10x10
+	http_requests{job="api-server", instance="1", group="production"}	0+20x10
+	http_requests{job="api-server", instance="0", group="canary"}		0+30x10
+	http_requests{job="api-server", instance="1", group="canary"}		0+40x10
+	http_requests{job="app-server", instance="0", group="production"}	0+50x10
+	http_requests{job="app-server", instance="1", group="production"}	0+60x10
+	http_requests{job="app-server", instance="0", group="canary"}		0+70x10
+	http_requests{job="app-server", instance="1", group="canary"}		0+80x10
+
+load 5m
+	vector_matching_a{l="x"} 0+1x100
+	vector_matching_a{l="y"} 0+2x50
+	vector_matching_b{l="x"} 0+4x25
+
+
+eval instant at 50m SUM(http_requests) BY (job) - COUNT(http_requests) BY (job)
+	{job="api-server"} 996
+	{job="app-server"} 2596
+
+eval instant at 50m 2 - SUM(http_requests) BY (job)
+	{job="api-server"} -998
+	{job="app-server"} -2598
+
+eval instant at 50m -http_requests{job="api-server",instance="0",group="production"}
+  {job="api-server",instance="0",group="production"} -100
+
+eval instant at 50m +http_requests{job="api-server",instance="0",group="production"}
+  http_requests{job="api-server",instance="0",group="production"} 100
+
+eval instant at 50m - - - SUM(http_requests) BY (job)
+	{job="api-server"} -1000
+	{job="app-server"} -2600
+
+eval instant at 50m - - - 1
+  -1
+
+eval instant at 50m -2^---1*3
+  -1.5
+
+eval instant at 50m 2/-2^---1*3+2
+  -10
+
+eval instant at 50m -10^3 * - SUM(http_requests) BY (job) ^ -1
+	{job="api-server"} 1
+	{job="app-server"} 0.38461538461538464
+
+eval instant at 50m 1000 / SUM(http_requests) BY (job)
+	{job="api-server"} 1
+	{job="app-server"} 0.38461538461538464
+
+eval instant at 50m SUM(http_requests) BY (job) - 2
+	{job="api-server"} 998
+	{job="app-server"} 2598
+
+eval instant at 50m SUM(http_requests) BY (job) % 3
+	{job="api-server"} 1
+	{job="app-server"} 2
+
+eval instant at 50m SUM(http_requests) BY (job) % 0.3
+	{job="api-server"} 0.1
+	{job="app-server"} 0.2
+
+eval instant at 50m SUM(http_requests) BY (job) ^ 2
+	{job="api-server"} 1000000
+	{job="app-server"} 6760000
+
+eval instant at 50m SUM(http_requests) BY (job) % 3 ^ 2
+	{job="api-server"} 1
+	{job="app-server"} 8
+
+eval instant at 50m SUM(http_requests) BY (job) % 2 ^ (3 ^ 2)
+	{job="api-server"} 488
+	{job="app-server"} 40
+
+eval instant at 50m SUM(http_requests) BY (job) % 2 ^ 3 ^ 2
+	{job="api-server"} 488
+	{job="app-server"} 40
+
+eval instant at 50m SUM(http_requests) BY (job) % 2 ^ 3 ^ 2 ^ 2
+	{job="api-server"} 1000
+	{job="app-server"} 2600
+
+eval instant at 50m COUNT(http_requests) BY (job) ^ COUNT(http_requests) BY (job)
+	{job="api-server"} 256
+	{job="app-server"} 256
+
+eval instant at 50m SUM(http_requests) BY (job) / 0
+	{job="api-server"} +Inf
+	{job="app-server"} +Inf
+
+eval instant at 50m http_requests{group="canary", instance="0", job="api-server"} / 0
+	{group="canary", instance="0", job="api-server"} +Inf
+
+eval instant at 50m -1 * http_requests{group="canary", instance="0", job="api-server"} / 0
+	{group="canary", instance="0", job="api-server"} -Inf
+
+eval instant at 50m 0 * http_requests{group="canary", instance="0", job="api-server"} / 0
+	{group="canary", instance="0", job="api-server"} NaN
+
+eval instant at 50m 0 * http_requests{group="canary", instance="0", job="api-server"} % 0
+	{group="canary", instance="0", job="api-server"} NaN
+
+eval instant at 50m SUM(http_requests) BY (job) + SUM(http_requests) BY (job)
+	{job="api-server"} 2000
+	{job="app-server"} 5200
+
+eval instant at 50m (SUM((http_requests)) BY (job)) + SUM(http_requests) BY (job)
+	{job="api-server"} 2000
+	{job="app-server"} 5200
+
+eval instant at 50m http_requests{job="api-server", group="canary"}
+	http_requests{group="canary", instance="0", job="api-server"} 300
+	http_requests{group="canary", instance="1", job="api-server"} 400
+
+eval instant at 50m http_requests{job="api-server", group="canary"} + rate(http_requests{job="api-server"}[5m]) * 5 * 60
+	{group="canary", instance="0", job="api-server"} 330
+	{group="canary", instance="1", job="api-server"} 440
+
+eval instant at 50m rate(http_requests[25m]) * 25 * 60
+  {group="canary", instance="0", job="api-server"} 150
+  {group="canary", instance="0", job="app-server"} 350
+  {group="canary", instance="1", job="api-server"} 200
+  {group="canary", instance="1", job="app-server"} 400
+  {group="production", instance="0", job="api-server"} 50
+  {group="production", instance="0", job="app-server"} 249.99999999999997
+  {group="production", instance="1", job="api-server"} 100
+  {group="production", instance="1", job="app-server"} 300
+
+eval instant at 50m (rate((http_requests[25m])) * 25) * 60
+  {group="canary", instance="0", job="api-server"} 150
+  {group="canary", instance="0", job="app-server"} 350
+  {group="canary", instance="1", job="api-server"} 200
+  {group="canary", instance="1", job="app-server"} 400
+  {group="production", instance="0", job="api-server"} 50
+  {group="production", instance="0", job="app-server"} 249.99999999999997
+  {group="production", instance="1", job="api-server"} 100
+  {group="production", instance="1", job="app-server"} 300
+
+
+eval instant at 50m http_requests{group="canary"} and http_requests{instance="0"}
+	http_requests{group="canary", instance="0", job="api-server"} 300
+	http_requests{group="canary", instance="0", job="app-server"} 700
+
+eval instant at 50m (http_requests{group="canary"} + 1) and http_requests{instance="0"}
+	{group="canary", instance="0", job="api-server"} 301
+	{group="canary", instance="0", job="app-server"} 701
+
+eval instant at 50m (http_requests{group="canary"} + 1) and on(instance, job) http_requests{instance="0", group="production"}
+	{group="canary", instance="0", job="api-server"} 301
+	{group="canary", instance="0", job="app-server"} 701
+
+eval instant at 50m (http_requests{group="canary"} + 1) and on(instance) http_requests{instance="0", group="production"}
+	{group="canary", instance="0", job="api-server"} 301
+	{group="canary", instance="0", job="app-server"} 701
+
+eval instant at 50m (http_requests{group="canary"} + 1) and ignoring(group) http_requests{instance="0", group="production"}
+	{group="canary", instance="0", job="api-server"} 301
+	{group="canary", instance="0", job="app-server"} 701
+
+eval instant at 50m (http_requests{group="canary"} + 1) and ignoring(group, job) http_requests{instance="0", group="production"}
+	{group="canary", instance="0", job="api-server"} 301
+	{group="canary", instance="0", job="app-server"} 701
+
+eval instant at 50m http_requests{group="canary"} or http_requests{group="production"}
+	http_requests{group="canary", instance="0", job="api-server"} 300
+	http_requests{group="canary", instance="0", job="app-server"} 700
+	http_requests{group="canary", instance="1", job="api-server"} 400
+	http_requests{group="canary", instance="1", job="app-server"} 800
+	http_requests{group="production", instance="0", job="api-server"} 100
+	http_requests{group="production", instance="0", job="app-server"} 500
+	http_requests{group="production", instance="1", job="api-server"} 200
+	http_requests{group="production", instance="1", job="app-server"} 600
+
+# On overlap the rhs samples must be dropped.
+eval instant at 50m (http_requests{group="canary"} + 1) or http_requests{instance="1"}
+	{group="canary", instance="0", job="api-server"} 301
+	{group="canary", instance="0", job="app-server"} 701
+	{group="canary", instance="1", job="api-server"} 401
+	{group="canary", instance="1", job="app-server"} 801
+	http_requests{group="production", instance="1", job="api-server"} 200
+	http_requests{group="production", instance="1", job="app-server"} 600
+
+
+# Matching only on instance excludes everything that has instance=0/1 but includes
+# entries without the instance label.
+eval instant at 50m (http_requests{group="canary"} + 1) or on(instance) (http_requests or cpu_count or vector_matching_a)
+	{group="canary", instance="0", job="api-server"} 301
+	{group="canary", instance="0", job="app-server"} 701
+	{group="canary", instance="1", job="api-server"} 401
+	{group="canary", instance="1", job="app-server"} 801
+	vector_matching_a{l="x"} 10
+	vector_matching_a{l="y"} 20
+
+eval instant at 50m (http_requests{group="canary"} + 1) or ignoring(l, group, job) (http_requests or cpu_count or vector_matching_a)
+	{group="canary", instance="0", job="api-server"} 301
+	{group="canary", instance="0", job="app-server"} 701
+	{group="canary", instance="1", job="api-server"} 401
+	{group="canary", instance="1", job="app-server"} 801
+	vector_matching_a{l="x"} 10
+	vector_matching_a{l="y"} 20
+
+eval instant at 50m http_requests{group="canary"} unless http_requests{instance="0"}
+	http_requests{group="canary", instance="1", job="api-server"} 400
+	http_requests{group="canary", instance="1", job="app-server"} 800
+
+eval instant at 50m http_requests{group="canary"} unless on(job) http_requests{instance="0"}
+
+eval instant at 50m http_requests{group="canary"} unless on(job, instance) http_requests{instance="0"}
+	http_requests{group="canary", instance="1", job="api-server"} 400
+	http_requests{group="canary", instance="1", job="app-server"} 800
+
+eval instant at 50m http_requests{group="canary"} / on(instance,job) http_requests{group="production"}
+	{instance="0", job="api-server"} 3
+	{instance="0", job="app-server"} 1.4
+	{instance="1", job="api-server"} 2
+	{instance="1", job="app-server"} 1.3333333333333333
+
+eval instant at 50m http_requests{group="canary"} unless ignoring(group, instance) http_requests{instance="0"}
+
+eval instant at 50m http_requests{group="canary"} unless ignoring(group) http_requests{instance="0"}
+	http_requests{group="canary", instance="1", job="api-server"} 400
+	http_requests{group="canary", instance="1", job="app-server"} 800
+
+eval instant at 50m http_requests{group="canary"} / ignoring(group) http_requests{group="production"}
+	{instance="0", job="api-server"} 3
+	{instance="0", job="app-server"} 1.4
+	{instance="1", job="api-server"} 2
+	{instance="1", job="app-server"} 1.3333333333333333
+
+# https://github.com/prometheus/prometheus/issues/1489
+eval instant at 50m http_requests AND ON (dummy) vector(1)
+	http_requests{group="canary", instance="0", job="api-server"} 300
+	http_requests{group="canary", instance="0", job="app-server"} 700
+	http_requests{group="canary", instance="1", job="api-server"} 400
+	http_requests{group="canary", instance="1", job="app-server"} 800
+	http_requests{group="production", instance="0", job="api-server"} 100
+	http_requests{group="production", instance="0", job="app-server"} 500
+	http_requests{group="production", instance="1", job="api-server"} 200
+	http_requests{group="production", instance="1", job="app-server"} 600
+
+eval instant at 50m http_requests AND IGNORING (group, instance, job) vector(1)
+	http_requests{group="canary", instance="0", job="api-server"} 300
+	http_requests{group="canary", instance="0", job="app-server"} 700
+	http_requests{group="canary", instance="1", job="api-server"} 400
+	http_requests{group="canary", instance="1", job="app-server"} 800
+	http_requests{group="production", instance="0", job="api-server"} 100
+	http_requests{group="production", instance="0", job="app-server"} 500
+	http_requests{group="production", instance="1", job="api-server"} 200
+	http_requests{group="production", instance="1", job="app-server"} 600
+
+
+# Comparisons.
+eval instant at 50m SUM(http_requests) BY (job) > 1000
+	{job="app-server"} 2600
+
+eval instant at 50m 1000 < SUM(http_requests) BY (job)
+	{job="app-server"} 2600
+
+eval instant at 50m SUM(http_requests) BY (job) <= 1000
+	{job="api-server"} 1000
+
+eval instant at 50m SUM(http_requests) BY (job) != 1000
+	{job="app-server"} 2600
+
+eval instant at 50m SUM(http_requests) BY (job) == 1000
+	{job="api-server"} 1000
+
+eval instant at 50m SUM(http_requests) BY (job) == bool 1000
+	{job="api-server"} 1
+	{job="app-server"} 0
+
+eval instant at 50m SUM(http_requests) BY (job) == bool SUM(http_requests) BY (job)
+	{job="api-server"} 1
+	{job="app-server"} 1
+
+eval instant at 50m SUM(http_requests) BY (job) != bool SUM(http_requests) BY (job)
+	{job="api-server"} 0
+	{job="app-server"} 0
+
+eval instant at 50m 0 == bool 1
+	0
+
+eval instant at 50m 1 == bool 1
+	1
+
+eval instant at 50m http_requests{job="api-server", instance="0", group="production"} == bool 100
+	{job="api-server", instance="0", group="production"} 1
+
+# group_left/group_right.
+
+clear
+
+load 5m
+  node_var{instance="abc",job="node"} 2
+  node_role{instance="abc",job="node",role="prometheus"} 1
+
+load 5m
+  node_cpu{instance="abc",job="node",mode="idle"} 3
+  node_cpu{instance="abc",job="node",mode="user"} 1
+  node_cpu{instance="def",job="node",mode="idle"} 8
+  node_cpu{instance="def",job="node",mode="user"} 2
+
+load 5m
+  random{foo="bar"} 1
+
+load 5m
+  threshold{instance="abc",job="node",target="a@b.com"} 0
+
+# Copy machine role to node variable.
+eval instant at 5m node_role * on (instance) group_right (role) node_var
+  {instance="abc",job="node",role="prometheus"} 2
+
+eval instant at 5m node_var * on (instance) group_left (role) node_role
+  {instance="abc",job="node",role="prometheus"} 2
+
+eval instant at 5m node_var * ignoring (role) group_left (role) node_role
+  {instance="abc",job="node",role="prometheus"} 2
+
+eval instant at 5m node_role * ignoring (role) group_right (role) node_var
+  {instance="abc",job="node",role="prometheus"} 2
+
+# Copy machine role to node variable with instrumentation labels.
+eval instant at 5m node_cpu * ignoring (role, mode) group_left (role) node_role
+  {instance="abc",job="node",mode="idle",role="prometheus"} 3
+  {instance="abc",job="node",mode="user",role="prometheus"} 1
+
+eval instant at 5m node_cpu * on (instance) group_left (role) node_role
+  {instance="abc",job="node",mode="idle",role="prometheus"} 3
+  {instance="abc",job="node",mode="user",role="prometheus"} 1
+
+
+# Ratio of total.
+eval instant at 5m node_cpu / on (instance) group_left sum by (instance,job)(node_cpu)
+  {instance="abc",job="node",mode="idle"} .75
+  {instance="abc",job="node",mode="user"} .25
+  {instance="def",job="node",mode="idle"} .80
+  {instance="def",job="node",mode="user"} .20
+
+eval instant at 5m sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu)
+  {job="node",mode="idle"} 0.7857142857142857
+  {job="node",mode="user"} 0.21428571428571427
+
+eval instant at 5m sum(sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu))
+  {} 1.0
+
+
+eval instant at 5m node_cpu / ignoring (mode) group_left sum without (mode)(node_cpu)
+  {instance="abc",job="node",mode="idle"} .75
+  {instance="abc",job="node",mode="user"} .25
+  {instance="def",job="node",mode="idle"} .80
+  {instance="def",job="node",mode="user"} .20
+
+eval instant at 5m node_cpu / ignoring (mode) group_left(dummy) sum without (mode)(node_cpu)
+  {instance="abc",job="node",mode="idle"} .75
+  {instance="abc",job="node",mode="user"} .25
+  {instance="def",job="node",mode="idle"} .80
+  {instance="def",job="node",mode="user"} .20
+
+eval instant at 5m sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu)
+  {job="node",mode="idle"} 0.7857142857142857
+  {job="node",mode="user"} 0.21428571428571427
+
+eval instant at 5m sum(sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu))
+  {} 1.0
+
+
+# Copy over label from metric with no matching labels, without having to list cross-job target labels ('job' here).
+eval instant at 5m node_cpu + on(dummy) group_left(foo) random*0
+  {instance="abc",job="node",mode="idle",foo="bar"} 3
+  {instance="abc",job="node",mode="user",foo="bar"} 1
+  {instance="def",job="node",mode="idle",foo="bar"} 8
+  {instance="def",job="node",mode="user",foo="bar"} 2
+
+
+# Use threshold from metric, and copy over target.
+eval instant at 5m node_cpu > on(job, instance) group_left(target) threshold
+  node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3
+  node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1
+
+# Use threshold from metric, and a default (1) if it's not present.
+eval instant at 5m node_cpu > on(job, instance) group_left(target) (threshold or on (job, instance) (sum by (job, instance)(node_cpu) * 0 + 1))
+  node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3
+  node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1
+  node_cpu{instance="def",job="node",mode="idle"} 8
+  node_cpu{instance="def",job="node",mode="user"} 2
+
+
+# Check that binops drop the metric name.
+eval instant at 5m node_cpu + 2
+  {instance="abc",job="node",mode="idle"} 5
+  {instance="abc",job="node",mode="user"} 3
+  {instance="def",job="node",mode="idle"} 10
+  {instance="def",job="node",mode="user"} 4
+
+eval instant at 5m node_cpu - 2
+  {instance="abc",job="node",mode="idle"} 1
+  {instance="abc",job="node",mode="user"} -1
+  {instance="def",job="node",mode="idle"} 6
+  {instance="def",job="node",mode="user"} 0
+
+eval instant at 5m node_cpu / 2
+  {instance="abc",job="node",mode="idle"} 1.5
+  {instance="abc",job="node",mode="user"} 0.5
+  {instance="def",job="node",mode="idle"} 4
+  {instance="def",job="node",mode="user"} 1
+
+eval instant at 5m node_cpu * 2
+  {instance="abc",job="node",mode="idle"} 6
+  {instance="abc",job="node",mode="user"} 2
+  {instance="def",job="node",mode="idle"} 16
+  {instance="def",job="node",mode="user"} 4
+
+eval instant at 5m node_cpu ^ 2
+  {instance="abc",job="node",mode="idle"} 9
+  {instance="abc",job="node",mode="user"} 1
+  {instance="def",job="node",mode="idle"} 64
+  {instance="def",job="node",mode="user"} 4
+
+eval instant at 5m node_cpu % 2
+  {instance="abc",job="node",mode="idle"} 1
+  {instance="abc",job="node",mode="user"} 1
+  {instance="def",job="node",mode="idle"} 0
+  {instance="def",job="node",mode="user"} 0
+
+
+clear
+
+load 5m
+  random{foo="bar"} 2
+  metricA{baz="meh"} 3
+  metricB{baz="meh"} 4
+
+# On with no labels, for metrics with no common labels.
+eval instant at 5m random + on() metricA
+  {} 5
+
+# Ignoring with no labels is the same as no ignoring.
+eval instant at 5m metricA + ignoring() metricB
+  {baz="meh"} 7
+
+eval instant at 5m metricA + metricB
+  {baz="meh"} 7
+
+clear
+
+# Test duplicate labelset in promql output.
+load 5m
+  testmetric1{src="a",dst="b"} 0
+  testmetric2{src="a",dst="b"} 1
+
+eval_fail instant at 0m -{__name__=~'testmetric1|testmetric2'}
+
+clear
+
+load 5m
+    test_total{instance="localhost"} 50
+    test_smaller{instance="localhost"} 10
+
+eval instant at 5m test_total > bool test_smaller
+    {instance="localhost"} 1
+
+eval instant at 5m test_total > test_smaller
+    test_total{instance="localhost"} 50
+
+eval instant at 5m test_total < bool test_smaller
+    {instance="localhost"} 0
+
+eval instant at 5m test_total < test_smaller
+
+clear
+
+# Testing atan2.
+load 5m
+    trigy{} 10
+    trigx{} 20
+    trigNaN{} NaN
+
+eval instant at 5m trigy atan2 trigx
+    {} 0.4636476090008061
+
+eval instant at 5m trigy atan2 trigNaN
+    {} NaN
+
+eval instant at 5m 10 atan2 20
+    0.4636476090008061
+
+eval instant at 5m 10 atan2 NaN
+    NaN
diff --git a/pkg/streamingpromql/testdata/upstream/selectors.test b/pkg/streamingpromql/testdata/upstream/selectors.test
new file mode 100644
index 0000000000..67b7e59ff3
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/selectors.test
@@ -0,0 +1,217 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/selectors.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+load 10s
+	http_requests{job="api-server", instance="0", group="production"}	0+10x1000 100+30x1000
+	http_requests{job="api-server", instance="1", group="production"}	0+20x1000 200+30x1000
+	http_requests{job="api-server", instance="0", group="canary"}		0+30x1000 300+80x1000
+	http_requests{job="api-server", instance="1", group="canary"}		0+40x2000
+
+eval instant at 8000s rate(http_requests[1m])
+	{job="api-server", instance="0", group="production"} 1
+	{job="api-server", instance="1", group="production"} 2
+	{job="api-server", instance="0", group="canary"} 3
+	{job="api-server", instance="1", group="canary"} 4
+
+eval instant at 18000s rate(http_requests[1m])
+	{job="api-server", instance="0", group="production"} 3
+	{job="api-server", instance="1", group="production"} 3
+	{job="api-server", instance="0", group="canary"} 8
+	{job="api-server", instance="1", group="canary"} 4
+
+eval instant at 8000s rate(http_requests{group=~"pro.*"}[1m])
+	{job="api-server", instance="0", group="production"} 1
+	{job="api-server", instance="1", group="production"} 2
+
+eval instant at 18000s rate(http_requests{group=~".*ry", instance="1"}[1m])
+	{job="api-server", instance="1", group="canary"} 4
+
+# Unsupported by streaming engine.
+# eval instant at 18000s rate(http_requests{instance!="3"}[1m] offset 10000s)
+# 	{job="api-server", instance="0", group="production"} 1
+# 	{job="api-server", instance="1", group="production"} 2
+# 	{job="api-server", instance="0", group="canary"} 3
+# 	{job="api-server", instance="1", group="canary"} 4
+
+# Unsupported by streaming engine.
+# eval instant at 4000s rate(http_requests{instance!="3"}[1m] offset -4000s)
+# 	{job="api-server", instance="0", group="production"} 1
+# 	{job="api-server", instance="1", group="production"} 2
+# 	{job="api-server", instance="0", group="canary"} 3
+# 	{job="api-server", instance="1", group="canary"} 4
+
+# Unsupported by streaming engine.
+# eval instant at 18000s rate(http_requests[40s]) - rate(http_requests[1m] offset 10000s)
+# 	{job="api-server", instance="0", group="production"} 2
+# 	{job="api-server", instance="1", group="production"} 1
+# 	{job="api-server", instance="0", group="canary"} 5
+# 	{job="api-server", instance="1", group="canary"} 0
+
+# https://github.com/prometheus/prometheus/issues/3575
+eval instant at 0s http_requests{foo!="bar"}
+	http_requests{job="api-server", instance="0", group="production"} 0
+	http_requests{job="api-server", instance="1", group="production"} 0
+	http_requests{job="api-server", instance="0", group="canary"} 0
+	http_requests{job="api-server", instance="1", group="canary"} 0
+
+eval instant at 0s http_requests{foo!="bar", job="api-server"}
+	http_requests{job="api-server", instance="0", group="production"} 0
+	http_requests{job="api-server", instance="1", group="production"} 0
+	http_requests{job="api-server", instance="0", group="canary"} 0
+	http_requests{job="api-server", instance="1", group="canary"} 0
+
+eval instant at 0s http_requests{foo!~"bar", job="api-server"}
+	http_requests{job="api-server", instance="0", group="production"} 0
+	http_requests{job="api-server", instance="1", group="production"} 0
+	http_requests{job="api-server", instance="0", group="canary"} 0
+	http_requests{job="api-server", instance="1", group="canary"} 0
+
+eval instant at 0s http_requests{foo!~"bar", job="api-server", instance="1", x!="y", z="", group!=""}
+	http_requests{job="api-server", instance="1", group="production"} 0
+	http_requests{job="api-server", instance="1", group="canary"} 0
+
+# https://github.com/prometheus/prometheus/issues/7994
+eval instant at 8000s rate(http_requests{group=~"(?i:PRO).*"}[1m])
+	{job="api-server", instance="0", group="production"} 1
+	{job="api-server", instance="1", group="production"} 2
+
+eval instant at 8000s rate(http_requests{group=~".*?(?i:PRO).*"}[1m])
+	{job="api-server", instance="0", group="production"} 1
+	{job="api-server", instance="1", group="production"} 2
+
+eval instant at 8000s rate(http_requests{group=~".*(?i:DUC).*"}[1m])
+	{job="api-server", instance="0", group="production"} 1
+	{job="api-server", instance="1", group="production"} 2
+
+eval instant at 8000s rate(http_requests{group=~".*(?i:TION)"}[1m])
+	{job="api-server", instance="0", group="production"} 1
+	{job="api-server", instance="1", group="production"} 2
+
+eval instant at 8000s rate(http_requests{group=~".*(?i:TION).*?"}[1m])
+	{job="api-server", instance="0", group="production"} 1
+	{job="api-server", instance="1", group="production"} 2
+
+
+eval instant at 8000s rate(http_requests{group=~"((?i)PRO).*"}[1m])
+	{job="api-server", instance="0", group="production"} 1
+	{job="api-server", instance="1", group="production"} 2
+
+eval instant at 8000s rate(http_requests{group=~".*((?i)DUC).*"}[1m])
+	{job="api-server", instance="0", group="production"} 1
+	{job="api-server", instance="1", group="production"} 2
+
+eval instant at 8000s rate(http_requests{group=~".*((?i)TION)"}[1m])
+	{job="api-server", instance="0", group="production"} 1
+	{job="api-server", instance="1", group="production"} 2
+
+
+eval instant at 8000s rate(http_requests{group=~"(?i:PRODUCTION)"}[1m])
+	{job="api-server", instance="0", group="production"} 1
+	{job="api-server", instance="1", group="production"} 2
+
+eval instant at 8000s rate(http_requests{group=~".*(?i:C).*"}[1m])
+	{job="api-server", instance="0", group="production"} 1
+	{job="api-server", instance="1", group="production"} 2
+	{job="api-server", instance="0", group="canary"} 3
+	{job="api-server", instance="1", group="canary"} 4
+
+clear
+load 1m
+    metric1{a="a"} 0+1x100
+    metric2{b="b"} 0+1x50
+
+# Unsupported by streaming engine.
+# eval instant at 90m metric1 offset 15m or metric2 offset 45m
+#   metric1{a="a"} 75
+#   metric2{b="b"} 45
+
+clear
+
+load 5m
+	x{y="testvalue"} 0+10x10
+
+load 5m
+	cpu_count{instance="0", type="numa"}	0+30x10
+	cpu_count{instance="0", type="smp"} 	0+10x20
+	cpu_count{instance="1", type="smp"} 	0+20x10
+
+load 5m
+	label_grouping_test{a="aa", b="bb"}	0+10x10
+	label_grouping_test{a="a", b="abb"}	0+20x10
+
+load 5m
+	http_requests{job="api-server", instance="0", group="production"}	0+10x10
+	http_requests{job="api-server", instance="1", group="production"}	0+20x10
+	http_requests{job="api-server", instance="0", group="canary"}		0+30x10
+	http_requests{job="api-server", instance="1", group="canary"}		0+40x10
+	http_requests{job="app-server", instance="0", group="production"}	0+50x10
+	http_requests{job="app-server", instance="1", group="production"}	0+60x10
+	http_requests{job="app-server", instance="0", group="canary"}		0+70x10
+	http_requests{job="app-server", instance="1", group="canary"}		0+80x10
+
+# Single-letter label names and values.
+eval instant at 50m x{y="testvalue"}
+	x{y="testvalue"} 100
+
+# Basic Regex
+eval instant at 50m {__name__=~".+"}
+	http_requests{group="canary", instance="0", job="api-server"} 300
+	http_requests{group="canary", instance="0", job="app-server"} 700
+	http_requests{group="canary", instance="1", job="api-server"} 400
+	http_requests{group="canary", instance="1", job="app-server"} 800
+	http_requests{group="production", instance="0", job="api-server"} 100
+	http_requests{group="production", instance="0", job="app-server"} 500
+	http_requests{group="production", instance="1", job="api-server"} 200
+	http_requests{group="production", instance="1", job="app-server"} 600
+	x{y="testvalue"} 100
+	label_grouping_test{a="a", b="abb"} 200
+	label_grouping_test{a="aa", b="bb"} 100
+	cpu_count{instance="1", type="smp"} 200
+	cpu_count{instance="0", type="smp"} 100
+	cpu_count{instance="0", type="numa"} 300
+
+eval instant at 50m {job=~".+-server", job!~"api-.+"}
+	http_requests{group="canary", instance="0", job="app-server"} 700
+	http_requests{group="canary", instance="1", job="app-server"} 800
+	http_requests{group="production", instance="0", job="app-server"} 500
+	http_requests{group="production", instance="1", job="app-server"} 600
+
+eval instant at 50m http_requests{group!="canary"}
+	http_requests{group="production", instance="1", job="app-server"} 600
+	http_requests{group="production", instance="0", job="app-server"} 500
+	http_requests{group="production", instance="1", job="api-server"} 200
+	http_requests{group="production", instance="0", job="api-server"} 100
+
+eval instant at 50m http_requests{job=~".+-server",group!="canary"}
+	http_requests{group="production", instance="1", job="app-server"} 600
+	http_requests{group="production", instance="0", job="app-server"} 500
+	http_requests{group="production", instance="1", job="api-server"} 200
+	http_requests{group="production", instance="0", job="api-server"} 100
+
+eval instant at 50m http_requests{job!~"api-.+",group!="canary"}
+	http_requests{group="production", instance="1", job="app-server"} 600
+	http_requests{group="production", instance="0", job="app-server"} 500
+
+eval instant at 50m http_requests{group="production",job=~"api-.+"}
+	http_requests{group="production", instance="0", job="api-server"} 100
+	http_requests{group="production", instance="1", job="api-server"} 200
+
+# Unsupported by streaming engine.
+# eval instant at 50m http_requests{group="production",job="api-server"} offset 5m
+# 	http_requests{group="production", instance="0", job="api-server"} 90
+# 	http_requests{group="production", instance="1", job="api-server"} 180
+
+clear
+
+# Matrix tests.
+load 1h
+	testmetric{aa="bb"} 1
+	testmetric{a="abb"} 2
+
+eval instant at 0h testmetric
+	testmetric{aa="bb"} 1
+	testmetric{a="abb"} 2
+
+clear
diff --git a/pkg/streamingpromql/testdata/upstream/staleness.test b/pkg/streamingpromql/testdata/upstream/staleness.test
new file mode 100644
index 0000000000..dc238a8fae
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/staleness.test
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/staleness.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+load 10s
+  metric 0 1 stale 2
+
+# Instant vector doesn't return series when stale.
+eval instant at 10s metric
+  {__name__="metric"} 1
+
+eval instant at 20s metric
+
+eval instant at 30s metric
+  {__name__="metric"} 2
+
+eval instant at 40s metric
+  {__name__="metric"} 2
+
+# It goes stale 5 minutes after the last sample.
+eval instant at 330s metric
+  {__name__="metric"} 2
+
+eval instant at 331s metric
+
+
+# Range vector ignores stale sample.
+# Unsupported by streaming engine.
+# eval instant at 30s count_over_time(metric[1m])
+#   {} 3
+
+# Unsupported by streaming engine.
+# eval instant at 10s count_over_time(metric[1s])
+#   {} 1
+
+# Unsupported by streaming engine.
+# eval instant at 20s count_over_time(metric[1s])
+
+# Unsupported by streaming engine.
+# eval instant at 20s count_over_time(metric[10s])
+#   {} 1
+
+
+clear
+
+load 10s
+  metric 0
+
+# Series with single point goes stale after 5 minutes.
+eval instant at 0s metric
+  {__name__="metric"} 0
+
+eval instant at 150s metric
+  {__name__="metric"} 0
+
+eval instant at 300s metric
+  {__name__="metric"} 0
+
+eval instant at 301s metric
diff --git a/pkg/streamingpromql/testdata/upstream/subquery.test.disabled b/pkg/streamingpromql/testdata/upstream/subquery.test.disabled
new file mode 100644
index 0000000000..1fa05cf711
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/subquery.test.disabled
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/subquery.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+load 10s
+  metric 1 2
+
+# Evaluation before 0s gets no sample.
+eval instant at 10s sum_over_time(metric[50s:10s])
+  {} 3
+
+eval instant at 10s sum_over_time(metric[50s:5s])
+  {} 4
+
+# Every evaluation yields the last value, i.e. 2
+eval instant at 5m sum_over_time(metric[50s:10s])
+  {} 12
+
+# Series becomes stale at 5m10s (5m after last sample)
+# Hence subquery gets a single sample at 6m-50s=5m10s.
+eval instant at 6m sum_over_time(metric[50s:10s])
+  {} 2
+
+eval instant at 10s rate(metric[20s:10s])
+  {} 0.1
+
+eval instant at 20s rate(metric[20s:5s])
+  {} 0.05
+
+clear
+
+load 10s
+  http_requests{job="api-server", instance="1", group="production"} 0+20x1000 200+30x1000
+  http_requests{job="api-server", instance="0", group="production"} 0+10x1000 100+30x1000
+  http_requests{job="api-server", instance="0", group="canary"}  0+30x1000 300+80x1000
+  http_requests{job="api-server", instance="1", group="canary"}  0+40x2000
+
+eval instant at 8000s rate(http_requests{group=~"pro.*"}[1m:10s])
+  {job="api-server", instance="0", group="production"} 1
+  {job="api-server", instance="1", group="production"} 2
+
+eval instant at 20000s avg_over_time(rate(http_requests[1m])[1m:1s])
+  {job="api-server", instance="0", group="canary"}     8
+  {job="api-server", instance="1", group="canary"}     4
+  {job="api-server", instance="1", group="production"} 3
+  {job="api-server", instance="0", group="production"} 3
+
+clear
+
+load 10s
+  metric1 0+1x1000
+  metric2 0+2x1000
+  metric3 0+3x1000
+
+eval instant at 1000s sum_over_time(metric1[30s:10s])
+  {} 394
+
+# This is (394*2 - 100), because other than the last 100 at 1000s,
+# everything else is repeated with the 5s step.
+eval instant at 1000s sum_over_time(metric1[30s:5s])
+  {} 688
+
+# Offset is aligned with the step.
+eval instant at 1010s sum_over_time(metric1[30s:10s] offset 10s)
+  {} 394
+
+# Same result for different offsets due to step alignment.
+eval instant at 1010s sum_over_time(metric1[30s:10s] offset 9s)
+  {} 297
+
+eval instant at 1010s sum_over_time(metric1[30s:10s] offset 7s)
+  {} 297
+
+eval instant at 1010s sum_over_time(metric1[30s:10s] offset 5s)
+  {} 297
+
+eval instant at 1010s sum_over_time(metric1[30s:10s] offset 3s)
+  {} 297
+
+eval instant at 1010s sum_over_time((metric1)[30s:10s] offset 3s)
+  {} 297
+
+# Nested subqueries
+eval instant at 1000s rate(sum_over_time(metric1[30s:10s])[50s:10s])
+  {} 0.4
+
+eval instant at 1000s rate(sum_over_time(metric2[30s:10s])[50s:10s])
+  {} 0.8
+
+eval instant at 1000s rate(sum_over_time(metric3[30s:10s])[50s:10s])
+  {} 1.2
+
+eval instant at 1000s rate(sum_over_time((metric1+metric2+metric3)[30s:10s])[30s:10s])
+  {} 2.4
+
+clear
+
+# Fibonacci sequence, to ensure the rate is not constant.
+# Additional note: using subqueries unnecessarily is unwise.
+load 7s
+  metric 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987 1597 2584 4181 6765 10946 17711 28657 46368 75025 121393 196418 317811 514229 832040 1346269 2178309 3524578 5702887 9227465 14930352 24157817 39088169 63245986 102334155 165580141 267914296 433494437 701408733 1134903170 1836311903 2971215073 4807526976 7778742049 12586269025 20365011074 32951280099 53316291173 86267571272 139583862445 225851433717 365435296162 591286729879 956722026041 1548008755920 2504730781961 4052739537881 6557470319842 10610209857723 17167680177565 27777890035288 44945570212853 72723460248141 117669030460994 190392490709135 308061521170129 498454011879264 806515533049393 1304969544928657 2111485077978050 3416454622906707 5527939700884757 8944394323791464 14472334024676221 23416728348467685 37889062373143906 61305790721611591 99194853094755497 160500643816367088 259695496911122585 420196140727489673 679891637638612258 1100087778366101931 1779979416004714189 2880067194370816120 4660046610375530309 7540113804746346429 12200160415121876738 19740274219868223167 31940434634990099905 51680708854858323072 83621143489848422977 135301852344706746049 218922995834555169026 354224848179261915075 573147844013817084101 927372692193078999176 1500520536206896083277 2427893228399975082453 3928413764606871165730 6356306993006846248183 10284720757613717413913 16641027750620563662096 26925748508234281076009 43566776258854844738105 70492524767089125814114 114059301025943970552219 184551825793033096366333 298611126818977066918552 483162952612010163284885 781774079430987230203437 1264937032042997393488322 2046711111473984623691759 3311648143516982017180081 5358359254990966640871840 8670007398507948658051921 14028366653498915298923761 22698374052006863956975682 36726740705505779255899443 59425114757512643212875125 96151855463018422468774568 155576970220531065681649693 251728825683549488150424261 407305795904080553832073954 659034621587630041982498215 1066340417491710595814572169 1725375039079340637797070384 2791715456571051233611642553 4517090495650391871408712937 7308805952221443105020355490 11825896447871834976429068427 19134702400093278081449423917 30960598847965113057878492344 50095301248058391139327916261 81055900096023504197206408605 131151201344081895336534324866 212207101440105399533740733471 343358302784187294870275058337 555565404224292694404015791808 898923707008479989274290850145 1454489111232772683678306641953 2353412818241252672952597492098 3807901929474025356630904134051 6161314747715278029583501626149 9969216677189303386214405760200 16130531424904581415797907386349 26099748102093884802012313146549 42230279526998466217810220532898 68330027629092351019822533679447 110560307156090817237632754212345 178890334785183168257455287891792 289450641941273985495088042104137 468340976726457153752543329995929 757791618667731139247631372100066 1226132595394188293000174702095995 1983924214061919432247806074196061 3210056809456107725247980776292056 5193981023518027157495786850488117 8404037832974134882743767626780173 13598018856492162040239554477268290 22002056689466296922983322104048463 35600075545958458963222876581316753 57602132235424755886206198685365216 93202207781383214849429075266681969 150804340016807970735635273952047185 244006547798191185585064349218729154 394810887814999156320699623170776339 638817435613190341905763972389505493 1033628323428189498226463595560281832 1672445759041379840132227567949787325 2706074082469569338358691163510069157 4378519841510949178490918731459856482 7084593923980518516849609894969925639 11463113765491467695340528626429782121 18547707689471986212190138521399707760
+
+# Extrapolated from [3@21, 144@77]: (144 - 3) / (77 - 21)
+eval instant at 80s rate(metric[1m])
+  {} 2.517857143
+
+# No extrapolation, [2@20, 144@80]: (144 - 2) / 60
+eval instant at 80s rate(metric[1m:10s])
+  {} 2.366666667
+
+# Only one value between 10s and 20s, 2@14
+eval instant at 20s min_over_time(metric[10s])
+  {} 2
+
+# min(1@10, 2@20)
+eval instant at 20s min_over_time(metric[10s:10s])
+  {} 1
+
+eval instant at 20m min_over_time(rate(metric[5m])[20m:1m])
+  {} 0.12119047619047618
+
diff --git a/pkg/streamingpromql/testdata/upstream/trig_functions.test.disabled b/pkg/streamingpromql/testdata/upstream/trig_functions.test.disabled
new file mode 100644
index 0000000000..fb657867b5
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/trig_functions.test.disabled
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/trig_functions.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+# Testing sin() cos() tan() asin() acos() atan() sinh() cosh() tanh() rad() deg() pi().
+
+load 5m
+	trig{l="x"} 10
+	trig{l="y"} 20
+	trig{l="NaN"} NaN
+
+eval instant at 5m sin(trig)
+	{l="x"} -0.5440211108893699
+	{l="y"} 0.9129452507276277
+	{l="NaN"} NaN
+
+eval instant at 5m cos(trig)
+	{l="x"} -0.8390715290764524
+	{l="y"} 0.40808206181339196
+	{l="NaN"} NaN
+
+eval instant at 5m tan(trig)
+	{l="x"} 0.6483608274590867
+	{l="y"} 2.2371609442247427
+	{l="NaN"} NaN
+
+eval instant at 5m asin(trig - 10.1)
+	{l="x"} -0.10016742116155944
+	{l="y"} NaN
+	{l="NaN"} NaN
+
+eval instant at 5m acos(trig - 10.1)
+	{l="x"} 1.670963747956456
+	{l="y"} NaN
+	{l="NaN"} NaN
+
+eval instant at 5m atan(trig)
+	{l="x"} 1.4711276743037345
+	{l="y"} 1.5208379310729538
+	{l="NaN"} NaN
+
+eval instant at 5m sinh(trig)
+	{l="x"} 11013.232920103324
+	{l="y"} 2.4258259770489514e+08
+	{l="NaN"} NaN
+
+eval instant at 5m cosh(trig)
+	{l="x"} 11013.232920103324
+	{l="y"} 2.4258259770489514e+08
+	{l="NaN"} NaN
+
+eval instant at 5m tanh(trig)
+	{l="x"} 0.9999999958776927
+	{l="y"} 1
+	{l="NaN"} NaN
+
+eval instant at 5m asinh(trig)
+	{l="x"} 2.99822295029797
+	{l="y"} 3.6895038689889055
+	{l="NaN"} NaN
+
+eval instant at 5m acosh(trig)
+	{l="x"} 2.993222846126381
+	{l="y"} 3.6882538673612966
+	{l="NaN"} NaN
+
+eval instant at 5m atanh(trig - 10.1)
+	{l="x"} -0.10033534773107522
+	{l="y"} NaN
+	{l="NaN"} NaN
+
+eval instant at 5m rad(trig)
+	{l="x"} 0.17453292519943295
+	{l="y"} 0.3490658503988659
+	{l="NaN"} NaN
+
+eval instant at 5m rad(trig - 10)
+	{l="x"} 0
+	{l="y"} 0.17453292519943295
+	{l="NaN"} NaN
+
+eval instant at 5m rad(trig - 20)
+	{l="x"} -0.17453292519943295
+	{l="y"} 0
+	{l="NaN"} NaN
+
+eval instant at 5m deg(trig)
+	{l="x"} 572.9577951308232
+	{l="y"} 1145.9155902616465
+	{l="NaN"} NaN
+
+eval instant at 5m deg(trig - 10)
+	{l="x"} 0
+	{l="y"} 572.9577951308232
+	{l="NaN"} NaN
+
+eval instant at 5m deg(trig - 20)
+	{l="x"} -572.9577951308232
+	{l="y"} 0
+	{l="NaN"} NaN
+
+clear
+
+eval instant at 0s pi()
+	3.141592653589793
diff --git a/pkg/util/pool/bucketed_pool.go b/pkg/util/pool/bucketed_pool.go
new file mode 100644
index 0000000000..6c10d884e4
--- /dev/null
+++ b/pkg/util/pool/bucketed_pool.go
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/util/pool/pool.go
+// Provenance-includes-license: Apache-2.0
+// Provenance-includes-copyright: The Prometheus Authors
+
+package pool
+
+import (
+	"github.com/prometheus/prometheus/util/zeropool"
+)
+
+// BucketedPool is a bucketed pool for variably sized slices.
+// It is similar to prometheus/prometheus' pool.Pool, but uses zeropool.Pool internally, and
+// generics to avoid reflection.
+type BucketedPool[T ~[]E, E any] struct {
+	buckets []zeropool.Pool[T]
+	sizes   []int
+	// make is the function used to create an empty slice when none exist yet.
+	make func(int) T
+}
+
+// NewBucketedPool returns a new BucketedPool with size buckets for minSize to maxSize
+// increasing by the given factor.
+func NewBucketedPool[T ~[]E, E any](minSize, maxSize int, factor float64, makeFunc func(int) T) *BucketedPool[T, E] {
+	if minSize < 1 {
+		panic("invalid minimum pool size")
+	}
+	if maxSize < 1 {
+		panic("invalid maximum pool size")
+	}
+	if factor < 1 {
+		panic("invalid factor")
+	}
+
+	var sizes []int
+
+	for s := minSize; s <= maxSize; s = int(float64(s) * factor) {
+		sizes = append(sizes, s)
+	}
+
+	p := &BucketedPool[T, E]{
+		buckets: make([]zeropool.Pool[T], len(sizes)),
+		sizes:   sizes,
+		make:    makeFunc,
+	}
+
+	return p
+}
+
+// Get returns a new slice with capacity greater than or equal to size.
+func (p *BucketedPool[T, E]) Get(size int) T {
+	for i, bktSize := range p.sizes {
+		if size > bktSize {
+			continue
+		}
+		b := p.buckets[i].Get()
+		if b == nil {
+			b = p.make(bktSize)
+		}
+		return b
+	}
+	return p.make(size)
+}
+
+// Put adds a slice to the right bucket in the pool.
+// If the slice does not belong to any bucket in the pool, it is ignored.
+func (p *BucketedPool[T, E]) Put(s T) {
+	if cap(s) < p.sizes[0] {
+		return
+	}
+
+	for i, size := range p.sizes {
+		if cap(s) > size {
+			continue
+		}
+
+		if cap(s) == size {
+			// Slice is exactly the minimum size for this bucket. Add it to this bucket.
+			p.buckets[i].Put(s[0:0])
+		} else {
+			// Slice belongs in previous bucket.
+			p.buckets[i-1].Put(s[0:0])
+		}
+
+		return
+	}
+}
diff --git a/pkg/util/pool/bucketed_pool_test.go b/pkg/util/pool/bucketed_pool_test.go
new file mode 100644
index 0000000000..abafd1c089
--- /dev/null
+++ b/pkg/util/pool/bucketed_pool_test.go
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: AGPL-3.0-only
+// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/util/pool/pool_test.go
+// Provenance-includes-license: Apache-2.0
+// Provenance-includes-copyright: The Prometheus Authors
+
+package pool
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func makeFunc(size int) []int {
+	return make([]int, 0, size)
+}
+
+func TestBucketedPool_HappyPath(t *testing.T) {
+	testPool := NewBucketedPool(1, 8, 2, makeFunc)
+	cases := []struct {
+		size        int
+		expectedCap int
+	}{
+		{
+			size:        -1,
+			expectedCap: 1,
+		},
+		{
+			size:        3,
+			expectedCap: 4,
+		},
+		{
+			size:        10,
+			expectedCap: 10,
+		},
+	}
+	for _, c := range cases {
+		ret := testPool.Get(c.size)
+		require.Equal(t, c.expectedCap, cap(ret))
+		testPool.Put(ret)
+	}
+}
+
+func TestBucketedPool_SliceNotAlignedToBuckets(t *testing.T) {
+	pool := NewBucketedPool(1, 1000, 10, makeFunc)
+	pool.Put(make([]int, 0, 2))
+	s := pool.Get(3)
+	require.GreaterOrEqual(t, cap(s), 3)
+}
+
+func TestBucketedPool_PutEmptySlice(t *testing.T) {
+	pool := NewBucketedPool(1, 1000, 10, makeFunc)
+	pool.Put([]int{})
+	s := pool.Get(1)
+	require.GreaterOrEqual(t, cap(s), 1)
+}
+
+func TestBucketedPool_PutSliceSmallerThanMinimum(t *testing.T) {
+	pool := NewBucketedPool(3, 1000, 10, makeFunc)
+	pool.Put([]int{1, 2})
+	s := pool.Get(3)
+	require.GreaterOrEqual(t, cap(s), 3)
+}